From: 肖盛文 Date: Fri, 22 May 2020 17:08:29 +0000 (+0800) Subject: New upstream version 1.1.1 X-Git-Tag: archive/raspbian/1.1.9+ds1-4+rpi1^2^2~145^2 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/?a=commitdiff_plain;h=15074fe3ec479394a9ced01f6f04e06cad314c0b;p=opencc.git New upstream version 1.1.1 --- diff --git a/.appveyor.yml b/.appveyor.yml index 62e682b..5450aeb 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -39,8 +39,11 @@ for: - SET arch=%platform% - IF "%platform%"=="x86" SET arch=Win32 - - cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. + - cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DENABLE_BENCHMARK:BOOL=ON -DCMAKE_BUILD_TYPE=Release - cmake --build build --config Release --target install + test_script: + - cd build + - ctest --verbose -C Release after_build: - 7z a OpenCC.zip build/bin build/include build/lib build/share artifacts: diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index d675932..9d030cb 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -27,7 +27,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install flake8 pytest wheel if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | diff --git a/.npmignore b/.npmignore index 882a576..26cf29c 100644 --- a/.npmignore +++ b/.npmignore @@ -3,9 +3,22 @@ CMakeLists.txt *.cmake *.pyc +*.cmd +*.tgz +/.github +/.vscode +/.appveyor.yml +/.clang-format +/.travis.yml +/Makefile +/src/*Test.cpp +/src/*TestBase.cpp /doc /data/scheme +/deps/google-benchmark +/deps/gtest-1.11.0 +/deps/tclap-1.2.2 /build /debug /release @@ -14,9 +27,11 @@ CMakeLists.txt /doc/html /opencc.xcodeproj /python +/src/benchmark /test/benchmark /test/dict.ocd /test/dict.txt /test/dict.bin +/test/CommandLineConvertTest.cpp /node_modules /xcode diff --git a/AUTHORS b/AUTHORS index 83317af..52c080e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,5 +1,5 @@ Author: -BYVoid +Carbo Kuo Contributors: Peng Huang diff --git a/CMakeLists.txt b/CMakeLists.txt index bb2a4f2..05fff42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # # Open Chinese Convert # -# Copyright 2010-2020 BYVoid +# Copyright 2010-2020 Carbo Kuo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ set (PACKAGE_URL https://github.com/BYVoid/Opencc) set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues) set (OPENCC_VERSION_MAJOR 1) set (OPENCC_VERSION_MINOR 1) -set (OPENCC_VERSION_REVISION 0) +set (OPENCC_VERSION_REVISION 1) if (CMAKE_BUILD_TYPE MATCHES Debug) set (version_suffix .Debug) @@ -63,7 +63,9 @@ include(CPack) ######## Mac OS X -set(CMAKE_MACOSX_RPATH 1) +if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(CMAKE_MACOSX_RPATH 1) +endif() ######## Directory diff --git a/Makefile b/Makefile index 3320ccc..a60074f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # # Open Chinese Convert # -# Copyright 2010-2020 BYVoid +# Copyright 2010-2020 Carbo Kuo # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -80,24 +80,21 @@ xcode-build: python-build: cd python; python setup.py build -python-install: +python-install: python-build cd python; python setup.py install -python-develop: - cd python; python setup.py develop - python-test: python-build - cd python; python setup.py test + cd python; pytest . test-all: test node-test python-test format: - find "src" "node" -iname "*.hpp" -o -iname "*.cpp" -o -iname "*.cc" \ + find "src" "node" "test" -iname "*.hpp" -o -iname "*.cpp" -o -iname "*.cc" \ -o -iname "*.c" -o -iname "*.h" \ | xargs clang-format -i clean: - rm -rf build xcode + rm -rf build xcode python/opencc/clib install: build make -C build/rel install VERBOSE=${VERBOSE} PREFIX=${PREFIX} diff --git a/NEWS.md b/NEWS.md index bccee7c..767b006 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # Change History of OpenCC +## Version 1.1.1 + +2020å¹´5月22日 + +* 正式提供[Python](https://pypi.org/project/OpenCC/)接口和TypeScript類型標註。 +* 更新動態鏈接庫`SOVERSION`到`1.1`,由於C++內部接口發生變更。 +* 進一步改進與Windows MSVC的兼容性。 +* 簡化頭文件結構,加快編譯速度。刪除不必要的`using`。 +* 修復部分香港標準字。 + ## Version 1.1.0 2020å¹´5月10日 diff --git a/README.md b/README.md index a9ff383..b244e41 100644 --- a/README.md +++ b/README.md @@ -34,27 +34,60 @@ Warning: **This is NOT an API.** You will be banned if you make calls programmat https://opencc.byvoid.com/ -### Command Line - -* `opencc --help` -* `opencc_dict --help` -* `opencc_phrase_extract --help` - ### Node.js +[npm](https://www.npmjs.com/opencc) `npm i install opencc` + +#### JavaScript ```js const OpenCC = require('opencc'); -const opencc = new OpenCC('s2t.json'); -opencc.convertPromise("汉字").then(converted => { +const converter = new OpenCC('s2t.json'); +converter.convertPromise("汉字").then(converted => { console.log(converted); // 漢字 }); ``` -See [demo.js](https://github.com/BYVoid/OpenCC/blob/master/node/demo.js). +#### TypeScript +```ts +import { OpenCC } from 'opencc'; +async function main() { + const converter: OpenCC = new OpenCC('s2t.json'); + const result: string = await converter.convertPromise('汉字'); + console.log(result); +} +``` + +See [demo.js](https://github.com/BYVoid/OpenCC/blob/master/node/demo.js) and [ts-demo.ts](https://github.com/BYVoid/OpenCC/blob/master/node/ts-demo.ts). + +### Python + +[PyPI](https://pypi.org/project/OpenCC/) `pip install opencc` (Windows, Linux, Mac) + +```python +import opencc +converter = opencc.OpenCC('s2t.json') +converter.convert('汉字') # 漢字 +``` + +### C++ -### C++ Document 文檔 +```c++ +#include "opencc.h" -https://byvoid.github.io/OpenCC/ +int main() { + const SimpleConverter converter("s2t.json"); + converter.Convert("汉字"); // 漢字 + return 0; +} +``` + +Document 文檔: https://byvoid.github.io/OpenCC/ + +### Command Line + +* `opencc --help` +* `opencc_dict --help` +* `opencc_phrase_extract --help` ### Others (Unofficial) @@ -62,8 +95,6 @@ https://byvoid.github.io/OpenCC/ * Java: [opencc4j](https://github.com/houbb/opencc4j) * Android: [android-opencc](https://github.com/qichuan/android-opencc) * PHP: [opencc4php](https://github.com/nauxliu/opencc4php) -* Python (Reimplementation): [opencc-python](https://github.com/yichen0831/opencc-python) -* Python (C++ binding): [opencc-python](https://github.com/lepture/opencc-python) * WebAssembly: [wasm-opencc](https://github.com/oyyd/wasm-opencc) ### Configurations 配置文件 @@ -87,25 +118,34 @@ https://byvoid.github.io/OpenCC/ ### Build with CMake -Linux (g++ 4.6 is required) and Mac OS X (clang 3.2 is required): +#### Linux & Mac OS X + +g++ 4.6+ or clang 3.2+ is required. ```bash make ``` -Windows Visual Studio: +#### Windows Visual Studio: ```bash -cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -cmake --build build --config Release --target install +build.cmd ``` ### Test 測試 +#### Linux & Mac OS X + ``` make test ``` +#### Windows Visual Studio: + +```bash +test.cmd +``` + ### Benchmark 基準測試 ``` diff --git a/build.cmd b/build.cmd new file mode 100644 index 0000000..d627a48 --- /dev/null +++ b/build.cmd @@ -0,0 +1,2 @@ +cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. +cmake --build build --config Release --target install diff --git a/data/dictionary/HKVariants.txt b/data/dictionary/HKVariants.txt index 5654448..1ac9c87 100644 --- a/data/dictionary/HKVariants.txt +++ b/data/dictionary/HKVariants.txt @@ -56,11 +56,14 @@ 蘊 藴 蛻 蜕 衆 眾 +衕 同 +衚 胡 衛 衞 覈 æ ¸ 說 説 贗 贋 踊 踴 +蹟 è·¡ 轀 è¼¼ 醞 醖 鉢 ç¼½ diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt index 945e187..346d4ab 100644 --- a/data/dictionary/STPhrases.txt +++ b/data/dictionary/STPhrases.txt @@ -4511,8 +4511,8 @@ 修曼德 修曼德 修杰楷 修杰楷 修枝 修枝 -修桥舖路 修橋舖路 修桥补路 修橋補路 +修桥铺路 修橋鋪路 修樾 脩樾 修正 修正 修正为 修正爲 @@ -10258,7 +10258,6 @@ 卷舌元音 捲舌元音 卷舌音 捲舌音 卷舒 卷舒 -卷舖盖 捲舖蓋 卷菸 捲菸 卷落叶 捲落葉 卷衣袖 捲衣袖 @@ -21961,7 +21960,6 @@ 当罏红袖 當罏紅袖 当者披靡 當者披靡 当耳边风 當耳邊風 -当舖 當舖 当艄拿舵 當艄拿舵 当艄顺 當艄順 当花 當花 @@ -22174,7 +22172,7 @@ 彩色缤纷 彩色繽紛 彩虹 彩虹 彩虹仙子 彩虹仙子 -彩虹冰舖 彩虹冰舖 +彩虹冰铺 彩虹冰鋪 彩虹桥 彩虹橋 彩蛋 彩蛋 彩蝶 彩蝶 @@ -41028,7 +41026,6 @@ 药膏 藥膏 药膛 藥膛 药膳 藥膳 -药舖 藥舖 药茶 藥茶 药草 藥草 药草茶 藥草茶 diff --git a/data/dictionary/TWPhrasesIT.txt b/data/dictionary/TWPhrasesIT.txt index ea120c0..4a80ad7 100644 --- a/data/dictionary/TWPhrasesIT.txt +++ b/data/dictionary/TWPhrasesIT.txt @@ -321,6 +321,7 @@ U盤 隨身碟 變量 變數 軟件 軟體 軟驅 軟碟機 +轉義字符 跳脫字元 通信 通訊 通訊卡 通話卡 通配符 萬用字元 diff --git a/data/icon/opencc.svg b/data/icon/opencc.svg new file mode 100644 index 0000000..cff146d --- /dev/null +++ b/data/icon/opencc.svg @@ -0,0 +1,39 @@ + + + +]> + + + + + + + diff --git a/node/demo.js b/node/demo.js index c3890aa..af099eb 100644 --- a/node/demo.js +++ b/node/demo.js @@ -5,7 +5,7 @@ * @license * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/node/global.gypi b/node/global.gypi index 14bd740..8640f39 100644 --- a/node/global.gypi +++ b/node/global.gypi @@ -1,6 +1,6 @@ { "variables": { - "opencc_version": "1.1.0" + "opencc_version": "1.1.1" }, "target_defaults": { "defines": [ diff --git a/node/opencc.cc b/node/opencc.cc index a9a89f0..8f89ca6 100644 --- a/node/opencc.cc +++ b/node/opencc.cc @@ -14,6 +14,7 @@ #include "DictConverter.cpp" #include "DictEntry.cpp" #include "DictGroup.cpp" +#include "Lexicon.cpp" #include "MarisaDict.cpp" #include "MaxMatchSegmentation.cpp" #include "Segmentation.cpp" @@ -23,16 +24,16 @@ using namespace opencc; -string ToUtf8String(const v8::Local& val) { +std::string ToUtf8String(const v8::Local& val) { Nan::Utf8String utf8(val); - return string(*utf8); + return std::string(*utf8); } class OpenccBinding : public Nan::ObjectWrap { struct ConvertRequest { OpenccBinding* instance; - string input; - string output; + std::string input; + std::string output; Nan::Callback* callback; Optional ex; @@ -44,12 +45,14 @@ class OpenccBinding : public Nan::ObjectWrap { const ConverterPtr converter_; public: - explicit OpenccBinding(const string configFileName) + explicit OpenccBinding(const std::string configFileName) : config_(), converter_(config_.NewFromFile(configFileName)) {} virtual ~OpenccBinding() {} - string Convert(const string& input) { return converter_->Convert(input); } + std::string Convert(const std::string& input) { + return converter_->Convert(input); + } static NAN_METHOD(Version) { info.GetReturnValue().Set(Nan::New(VERSION).ToLocalChecked()); @@ -60,7 +63,7 @@ public: try { if (info.Length() >= 1 && info[0]->IsString()) { - const string configFile = ToUtf8String(info[0]); + const std::string configFile = ToUtf8String(info[0]); instance = new OpenccBinding(configFile); } else { instance = new OpenccBinding("s2t.json"); @@ -129,8 +132,8 @@ public: OpenccBinding* instance = Nan::ObjectWrap::Unwrap(info.This()); - const string input = ToUtf8String(info[0]); - string output; + const std::string input = ToUtf8String(info[0]); + std::string output; try { output = instance->Convert(input); } catch (opencc::Exception& e) { @@ -148,10 +151,10 @@ public: Nan::ThrowTypeError("Wrong arguments"); return; } - const string inputFileName = ToUtf8String(info[0]); - const string outputFileName = ToUtf8String(info[1]); - const string formatFrom = ToUtf8String(info[2]); - const string formatTo = ToUtf8String(info[3]); + const std::string inputFileName = ToUtf8String(info[0]); + const std::string outputFileName = ToUtf8String(info[1]); + const std::string formatFrom = ToUtf8String(info[2]); + const std::string formatTo = ToUtf8String(info[3]); try { opencc::ConvertDictionary(inputFileName, outputFileName, formatFrom, formatTo); diff --git a/node/opencc.d.ts b/node/opencc.d.ts new file mode 100644 index 0000000..4349f84 --- /dev/null +++ b/node/opencc.d.ts @@ -0,0 +1,9 @@ +declare class OpenCC { + constructor(config: string); + version(): string; + generateDict(inputFileName: string, outputFileName: string, formatFrom: string, formatTo: string): void; + convert(input: string, callback: (err: string, convertedText: string) => void): string; + convertSync(input: string): string; + convertPromise(input: string): Promise; +} +export { OpenCC }; diff --git a/node/opencc.js b/node/opencc.js index ecc82d0..f96bfcb 100644 --- a/node/opencc.js +++ b/node/opencc.js @@ -5,7 +5,7 @@ * @license * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,6 +55,9 @@ const OpenCC = module.exports = function (config) { this.handler = new binding.Opencc(config); }; +// This is to support both CommonJS and ES module. +OpenCC.OpenCC = OpenCC; + /** * The version of OpenCC library. * @@ -73,7 +76,6 @@ OpenCC.version = binding.Opencc.version(); * @param outputFileName Output dictionary filename. * @param formatFrom Input dictionary format. * @param formatTo Input dictionary format. - * @return Converted text. * @ingroup node_api */ OpenCC.generateDict = function (inputFileName, outputFileName, diff --git a/node/ts-demo.ts b/node/ts-demo.ts new file mode 100644 index 0000000..7d9e03e --- /dev/null +++ b/node/ts-demo.ts @@ -0,0 +1,31 @@ +/** + * @file + * Example of Node.js API. + * + * @license + * Open Chinese Convert + * + * Copyright 2010-2020 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { OpenCC } from './opencc'; + +async function main() { + const converter: OpenCC = new OpenCC('s2t.json'); + const result: string = await converter.convertPromise('汉字'); + console.log(result); +} + +main(); diff --git a/package-lock.json b/package-lock.json index 307bd5b..4f310d0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "opencc", - "version": "1.0.6", + "version": "1.1.1", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -473,9 +473,9 @@ "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" }, "fsevents": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.2.tgz", - "integrity": "sha512-R4wDiBwZ0KzpgOWetKDug1FZcYhqYnUYKtfZYt4mD5SBz76q0KR4Q9o7GIPamsVPGmW3EYPPJ0dOOjvx32ldZA==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.3.tgz", + "integrity": "sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==", "dev": true, "optional": true }, @@ -844,9 +844,9 @@ } }, "mocha": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-7.1.1.tgz", - "integrity": "sha512-3qQsu3ijNS3GkWcccT5Zw0hf/rWvu1fTN9sPvEd81hlwsr30GX2GcDSSoBxo24IR8FelmrAydGC6/1J5QQP4WA==", + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-7.1.2.tgz", + "integrity": "sha512-o96kdRKMKI3E8U0bjnfqW4QMk12MwZ4mhdBTf+B5a1q9+aq2HRnj+3ZdJu0B/ZhJeK78MgYuv6L8d/rA5AeBJA==", "dev": true, "requires": { "ansi-colors": "3.2.3", @@ -862,7 +862,7 @@ "js-yaml": "3.13.1", "log-symbols": "3.0.0", "minimatch": "3.0.4", - "mkdirp": "0.5.3", + "mkdirp": "0.5.5", "ms": "2.1.1", "node-environment-flags": "1.0.6", "object.assign": "4.1.0", @@ -875,15 +875,6 @@ "yargs-unparser": "1.6.0" }, "dependencies": { - "debug": { - "version": "3.2.6", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", - "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", - "dev": true, - "requires": { - "ms": "^2.1.1" - } - }, "glob": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz", @@ -898,15 +889,6 @@ "path-is-absolute": "^1.0.0" } }, - "mkdirp": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.3.tgz", - "integrity": "sha512-P+2gwrFqx8lhew375MQHHeTlY8AuOJSrGf0R5ddkEndUkmwpgUob/vQuBD1V22/Cw1/lJr4x+EjllSezBThzBg==", - "dev": true, - "requires": { - "minimist": "^1.2.5" - } - }, "ms": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", @@ -922,9 +904,9 @@ "dev": true }, "nan": { - "version": "2.14.0", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz", - "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg==" + "version": "2.14.1", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.1.tgz", + "integrity": "sha512-isWHgVjnFjh2x2yuJ/tj3JbwoHu3UC2dX5G/88Cm24yB6YopVgxvBObDY7n5xW6ExmFhJpSEQqFPvq9zaXc8Jw==" }, "needle": { "version": "2.4.1", diff --git a/package.json b/package.json index 24ed462..ab67ded 100644 --- a/package.json +++ b/package.json @@ -1,10 +1,11 @@ { "name": "opencc", - "version": "1.1.0-1", + "version": "1.1.1", "description": "Conversion between Traditional and Simplified Chinese", - "author": "BYVoid ", + "author": "Carbo Kuo ", "license": "Apache-2.0", "main": "node/opencc.js", + "types": "node/opencc.d.ts", "scripts": { "test": "mocha -R spec node/test.js", "deploy": "node-pre-gyp package && (node-pre-gyp-github publish --release || exit 0)", @@ -36,11 +37,11 @@ "Traditional Chinese" ], "devDependencies": { - "mocha": "^7.1.1", + "mocha": "^7.1.2", "node-pre-gyp-github": "^1.4.3" }, "dependencies": { - "nan": "^2.14.0", + "nan": "^2.14.1", "node-pre-gyp": "^0.14.0" } } diff --git a/python/.gitignore b/python/.gitignore index 659d0c1..a262cac 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -101,6 +101,3 @@ dmypy.json # Pyre type checker .pyre/ - -# Generated files -opencc/version.py diff --git a/python/opencc/.gitignore b/python/opencc/.gitignore new file mode 100644 index 0000000..8840e76 --- /dev/null +++ b/python/opencc/.gitignore @@ -0,0 +1,2 @@ +version.py +clib/ diff --git a/python/opencc/__init__.py b/python/opencc/__init__.py index 1d457cb..79b7970 100644 --- a/python/opencc/__init__.py +++ b/python/opencc/__init__.py @@ -9,7 +9,6 @@ import os import platform import sys from ctypes import CDLL, c_char_p, c_size_t, c_void_p, cast -from ctypes.util import find_library try: from opencc.version import __version__ # noqa @@ -23,19 +22,22 @@ else: __all__ = ['CONFIGS', 'convert', 'OpenCC'] -_libcfile = find_library('c') or 'libc.so.6' -libc = CDLL(_libcfile, use_errno=True) -libc.free.argtypes = [c_void_p] _thisdir = os.path.dirname(os.path.abspath(__file__)) _system = platform.system() if _system == 'Darwin': - _libopenccfilename = 'libopencc.2.dylib' + _libopenccfilename = 'libopencc.1.1.dylib' elif _system == 'Linux': - _libopenccfilename = 'libopencc.so.2' + _libopenccfilename = 'libopencc.so.1.1' +elif _system == 'Windows': + _libopenccfilename = 'opencc.dll' else: raise NotImplementedError('Not tested for {}'.format(_system)) -_libopenccfile = os.path.join(_thisdir, 'clib', 'lib', _libopenccfilename) + +if _system == 'Windows': + _libopenccfile = os.path.join(_thisdir, 'clib', 'bin', _libopenccfilename) +else: + _libopenccfile = os.path.join(_thisdir, 'clib', 'lib', _libopenccfilename) libopencc = None if os.path.isfile(_libopenccfile): @@ -43,7 +45,11 @@ if os.path.isfile(_libopenccfile): libopencc.opencc_open.restype = c_void_p libopencc.opencc_convert_utf8.argtypes = [c_void_p, c_char_p, c_size_t] libopencc.opencc_convert_utf8.restype = c_void_p + libopencc.opencc_convert_utf8_free.argtypes = [c_char_p] + libopencc.opencc_convert_utf8_free.restype = c_void_p libopencc.opencc_close.argtypes = [c_void_p] + libopencc.opencc_error.argtypes = [] + libopencc.opencc_error.restype = c_char_p _opencc_share_dir = os.path.join(_thisdir, 'clib', 'share', 'opencc') CONFIGS = [] @@ -66,7 +72,12 @@ class OpenCC(object): if not os.path.isfile(config): raise ValueError('Could not find file at {}'.format(config)) - self._od = libopencc.opencc_open(c_char_p(config.encode('utf-8'))) + od = libopencc.opencc_open(c_char_p(config.encode('utf-8'))) + if cast(od, c_void_p) == -1: + error = libopencc.opencc_error() + raise Exception(error.value) + + self._od = od def convert(self, text): if isinstance(text, text_type): @@ -75,11 +86,14 @@ class OpenCC(object): retv_i = libopencc.opencc_convert_utf8(self._od, text, len(text)) if retv_i == -1: - raise Exception('OpenCC Convert Error') + error = libopencc.opencc_error() + raise Exception(error.value) + retv_c = cast(retv_i, c_char_p) value = retv_c.value - libc.free(retv_c) + libopencc.opencc_convert_utf8_free(retv_c) return value.decode('utf-8') def __del__(self): - libopencc.opencc_close(self._od) + if hasattr(self, '_od'): + libopencc.opencc_close(self._od) diff --git a/python/opencc/clib/.gitignore b/python/opencc/clib/.gitignore deleted file mode 100644 index 355164c..0000000 --- a/python/opencc/clib/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*/ diff --git a/python/setup.py b/python/setup.py index 46f5616..55a69a7 100644 --- a/python/setup.py +++ b/python/setup.py @@ -4,12 +4,11 @@ import os import re import subprocess import sys +import warnings import setuptools import setuptools.command.build_py -import setuptools.command.develop -import setuptools.command.install -import setuptools.command.test +import wheel.bdist_wheel from opencc import _libopenccfile @@ -38,7 +37,8 @@ def get_version_info(): version_info[1] = match.group(2) elif match.group(1) == 'REVISION': version_info[2] = match.group(2) - return '.'.join(version_info) + version = '.'.join(version_info) + return version def write_version_file(version_info): @@ -77,30 +77,61 @@ def build_libopencc(): if os.path.isfile(_libopenccfile): return # Skip building binary file - print('building libopencc') - assert subprocess.call('command -v make', shell=True) == 0, \ - 'Build requires `make`' - assert subprocess.call('command -v cmake', shell=True) == 0, \ - 'Build requires `cmake`' - # Probably also needs to check for cpp-compilier - - errno = subprocess.call(( - 'mkdir -p {build_dir};' - 'cmake ' - '-B {build_dir} ' - '-DBUILD_DOCUMENTATION:BOOL=OFF ' - '-DENABLE_GTEST:BOOL=OFF ' - '-DCMAKE_BUILD_TYPE=Release ' - '-DCMAKE_INSTALL_PREFIX={clib_dir} ' - '..;' - 'make -C {build_dir} -j;' - 'make -C {build_dir} install;' - ).format( - build_dir=_build_dir, - clib_dir=_clib_dir - ), shell=True) - - assert errno == 0, 'Build failed' + print('building libopencc into %s' % _build_dir) + + def build_on_windows(): + subprocess.call('md %s' % _build_dir, shell=True) + cmd = ( + 'cmake ' + '-B {build_dir} ' + '-DBUILD_DOCUMENTATION:BOOL=OFF ' + '-DENABLE_GTEST:BOOL=OFF ' + '-DCMAKE_BUILD_TYPE=Release ' + '-DCMAKE_INSTALL_PREFIX={clib_dir} ' + '..' + ).format( + build_dir=_build_dir, + clib_dir=_clib_dir + ) + errno = subprocess.call(cmd, shell=True) + assert errno == 0, 'Configure failed' + cmd = ( + 'cmake --build {build_dir} --config Release --target install' + ).format( + build_dir=_build_dir + ) + errno = subprocess.call(cmd, shell=True) + assert errno == 0, 'Build failed' + + def build_on_posix(): + assert subprocess.call('command -v make', shell=True) == 0, \ + 'Build requires `make`' + assert subprocess.call('command -v cmake', shell=True) == 0, \ + 'Build requires `cmake`' + # Probably also needs to check for cpp-compilier + + errno = subprocess.call(( + 'mkdir -p {build_dir};' + 'cmake ' + '-B {build_dir} ' + '-DBUILD_DOCUMENTATION:BOOL=OFF ' + '-DENABLE_GTEST:BOOL=OFF ' + '-DCMAKE_BUILD_TYPE=Release ' + '-DCMAKE_INSTALL_PREFIX={clib_dir} ' + '..;' + 'make -C {build_dir} -j;' + 'make -C {build_dir} install;' + ).format( + build_dir=_build_dir, + clib_dir=_clib_dir + ), shell=True) + + assert errno == 0, 'Build failed' + + if sys.platform == 'win32': + build_on_windows() + else: + build_on_posix() assert os.path.isfile(_libopenccfile) @@ -110,23 +141,35 @@ class BuildPyCommand(setuptools.command.build_py.build_py, object): super(BuildPyCommand, self).run() -class InstallCommand(setuptools.command.install.install, object): - def run(self): - build_libopencc() - super(InstallCommand, self).run() +class BDistWheelCommand(wheel.bdist_wheel.bdist_wheel, object): + """Custom bdsit_wheel command that will change + default plat-name based on PEP 425 and PEP 513 + """ + @staticmethod + def _determine_platform_tag(): + if sys.platform == 'win32': + if 'amd64' in sys.version.lower(): + return 'win-amd64' + return sys.platform -class DevelopCommand(setuptools.command.develop.develop, object): - def run(self): - build_libopencc() - super(DevelopCommand, self).run() + if sys.platform == 'darwin': + _, _, _, _, machine = os.uname() + return 'macosx-10.9-{}'.format(machine) + + if os.name == 'posix': + _, _, _, _, machine = os.uname() + return 'manylinux1-{}'.format(machine) + warnings.warn( + 'Windows macos and linux are all not detected, ' + 'Proper distribution name cannot be determined.') + from distutils.util import get_platform + return get_platform() -class PyTestCommand(setuptools.command.test.test): - def run_tests(self): - import pytest - errno = pytest.main([]) - sys.exit(errno) + def initialize_options(self): + super(BDistWheelCommand, self).initialize_options() + self.plat_name = self._determine_platform_tag() version_info = get_version_info() @@ -135,7 +178,7 @@ write_version_file(version_info) author_info = get_author_info() setuptools.setup( - name='opencc-py', + name='OpenCC', version=version_info, author=author_info[0], author_email=author_info[1], @@ -146,20 +189,16 @@ setuptools.setup( packages=['opencc'], package_data={str('opencc'): [ + 'clib/bin/*.dll', 'clib/include/opencc/*', 'clib/lib/libopencc.*', 'clib/share/opencc/*', ]}, cmdclass={ 'build_py': BuildPyCommand, - 'install': InstallCommand, - 'develop': DevelopCommand, - 'test': PyTestCommand, + 'bdist_wheel': BDistWheelCommand }, - tests_require=['pytest'], - test_suite='tests', - classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', @@ -175,5 +214,5 @@ setuptools.setup( 'Topic :: Software Development :: Localization', ], license='Apache License 2.0', - keywords='opencc convert chinese' + keywords=['opencc', 'convert', 'chinese'] ) diff --git a/src/BinaryDict.cpp b/src/BinaryDict.cpp index beaccf4..2e66fd2 100644 --- a/src/BinaryDict.cpp +++ b/src/BinaryDict.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,10 @@ * limitations under the License. */ +#include +#include +#include + #include "BinaryDict.hpp" #include "Lexicon.hpp" @@ -30,8 +34,8 @@ size_t BinaryDict::KeyMaxLength() const { } void BinaryDict::SerializeToFile(FILE* fp) const { - string keyBuf, valueBuf; - vector keyOffsets, valueOffsets; + std::string keyBuf, valueBuf; + std::vector keyOffsets, valueOffsets; size_t keyTotalLength = 0, valueTotalLength = 0; ConstructBuffer(keyBuf, keyOffsets, keyTotalLength, valueBuf, valueOffsets, valueTotalLength); @@ -114,7 +118,7 @@ BinaryDictPtr BinaryDict::NewFromFile(FILE* fp) { } std::string key = dict->keyBuffer.c_str() + keyOffset; // Value offset - vector values; + std::vector values; for (size_t j = 0; j < numValues; j++) { size_t valueOffset; unitsRead = fread(&valueOffset, sizeof(size_t), 1, fp); @@ -131,9 +135,10 @@ BinaryDictPtr BinaryDict::NewFromFile(FILE* fp) { return dict; } -void BinaryDict::ConstructBuffer(string& keyBuf, vector& keyOffset, - size_t& keyTotalLength, string& valueBuf, - vector& valueOffset, +void BinaryDict::ConstructBuffer(std::string& keyBuf, + std::vector& keyOffset, + size_t& keyTotalLength, std::string& valueBuf, + std::vector& valueOffset, size_t& valueTotalLength) const { keyTotalLength = 0; valueTotalLength = 0; diff --git a/src/BinaryDict.hpp b/src/BinaryDict.hpp index ae415f5..7c23268 100644 --- a/src/BinaryDict.hpp +++ b/src/BinaryDict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,12 +42,12 @@ public: private: LexiconPtr lexicon; - string keyBuffer; - string valueBuffer; + std::string keyBuffer; + std::string valueBuffer; - void ConstructBuffer(string& keyBuffer, vector& keyOffset, - size_t& keyTotalLength, string& valueBuffer, - vector& valueOffset, + void ConstructBuffer(std::string& keyBuffer, std::vector& keyOffset, + size_t& keyTotalLength, std::string& valueBuffer, + std::vector& valueOffset, size_t& valueTotalLength) const; }; } // namespace opencc diff --git a/src/BinaryDictTest.cpp b/src/BinaryDictTest.cpp index 109b3e1..471221a 100644 --- a/src/BinaryDictTest.cpp +++ b/src/BinaryDictTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015-2020 BYVoid + * Copyright 2015-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ protected: : binDict(new BinaryDict(textDict->GetLexicon())), fileName("dict.bin"){}; const BinaryDictPtr binDict; - const string fileName; + const std::string fileName; }; TEST_F(BinaryDictTest, Serialization) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e31bd1e..4883990 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -44,6 +44,7 @@ set( DictConverter.cpp DictEntry.cpp DictGroup.cpp + Lexicon.cpp MarisaDict.cpp MaxMatchSegmentation.cpp PhraseExtract.cpp @@ -113,9 +114,9 @@ set_target_properties( OUTPUT_NAME opencc VERSION - 1.0.0 + 1.1.1 SOVERSION - 2 + 1.1 ) # Installation @@ -135,10 +136,26 @@ install( # Gtest if (ENABLE_GTEST) + if (WIN32) + add_custom_target( + copy_gtest_to_src + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy gtest" + ) + add_custom_target( + copy_gtest_main_to_src + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy gtest_main" + ) + endif() + foreach(TESTCASE ${UNITTESTS}) add_executable(${TESTCASE} ${TESTCASE}.cpp) target_link_libraries(${TESTCASE} gtest gtest_main libopencc) add_test(${TESTCASE} ${TESTCASE}) + if (WIN32) + add_dependencies(${TESTCASE} copy_gtest_to_src copy_gtest_main_to_src) + endif() endforeach() endif() diff --git a/src/CmdLineOutput.hpp b/src/CmdLineOutput.hpp index 33fcb55..80419eb 100644 --- a/src/CmdLineOutput.hpp +++ b/src/CmdLineOutput.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/Common.hpp b/src/Common.hpp index 2315fae..8d4e2f0 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,31 +23,14 @@ #pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820) #endif -#include -#include -#include -#include -#include -#include +#include #include -#include #include #include -#include -#include -#include -#include -#include - -#include "Exception.hpp" #include "Export.hpp" #include "Optional.hpp" -using std::list; -using std::string; -using std::vector; - // Forward decalarations and alias namespace opencc { class Config; @@ -88,9 +71,9 @@ typedef std::shared_ptr DartsDictPtr; } // namespace opencc #ifndef PKGDATADIR -const string PACKAGE_DATA_DIRECTORY = ""; +const std::string PACKAGE_DATA_DIRECTORY = ""; #else // ifndef PKGDATADIR -const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/"; +const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/"; #endif // ifndef PKGDATADIR #ifndef VERSION diff --git a/src/Config.cpp b/src/Config.cpp index 61a2d51..2b144a8 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ * limitations under the License. */ +#include +#include #include #include "document.h" @@ -24,6 +26,7 @@ #include "ConversionChain.hpp" #include "Converter.hpp" #include "DictGroup.hpp" +#include "Exception.hpp" #include "MarisaDict.hpp" #include "MaxMatchSegmentation.hpp" #include "TextDict.hpp" @@ -40,14 +43,15 @@ namespace { class ConfigInternal { public: - string configDirectory; + std::string configDirectory; std::unordered_map< - string, std::unordered_map>> + std::string, + std::unordered_map>> dictCache; const JSONValue& GetProperty(const JSONValue& doc, const char* name) { if (!doc.HasMember(name)) { - throw InvalidFormat("Required property not found: " + string(name)); + throw InvalidFormat("Required property not found: " + std::string(name)); } return doc[name]; } @@ -55,7 +59,7 @@ public: const JSONValue& GetObjectProperty(const JSONValue& doc, const char* name) { const JSONValue& obj = GetProperty(doc, name); if (!obj.IsObject()) { - throw InvalidFormat("Property must be an object: " + string(name)); + throw InvalidFormat("Property must be an object: " + std::string(name)); } return obj; } @@ -63,7 +67,7 @@ public: const JSONValue& GetArrayProperty(const JSONValue& doc, const char* name) { const JSONValue& obj = GetProperty(doc, name); if (!obj.IsArray()) { - throw InvalidFormat("Property must be an array: " + string(name)); + throw InvalidFormat("Property must be an array: " + std::string(name)); } return obj; } @@ -71,12 +75,14 @@ public: const char* GetStringProperty(const JSONValue& doc, const char* name) { const JSONValue& obj = GetProperty(doc, name); if (!obj.IsString()) { - throw InvalidFormat("Property must be a string: " + string(name)); + throw InvalidFormat("Property must be a std::string: " + + std::string(name)); } return obj.GetString(); } - template DictPtr LoadDictWithPaths(const string& fileName) { + template + DictPtr LoadDictWithPaths(const std::string& fileName) { // Working directory std::shared_ptr dict; if (SerializableDict::TryLoadFromFile(fileName, &dict)) { @@ -96,7 +102,8 @@ public: throw FileNotFound(fileName); } - DictPtr LoadDictFromFile(const string& type, const string& fileName) { + DictPtr LoadDictFromFile(const std::string& type, + const std::string& fileName) { if (type == "text") { return LoadDictWithPaths(fileName); } @@ -114,10 +121,10 @@ public: DictPtr ParseDict(const JSONValue& doc) { // Required: type - string type = GetStringProperty(doc, "type"); + std::string type = GetStringProperty(doc, "type"); if (type == "group") { - list dicts; + std::list dicts; const JSONValue& docs = GetArrayProperty(doc, "dicts"); for (rapidjson::SizeType i = 0; i < docs.Size(); i++) { if (docs[i].IsObject()) { @@ -129,7 +136,7 @@ public: } return DictGroupPtr(new DictGroup(dicts)); } else { - string fileName = GetStringProperty(doc, "file"); + std::string fileName = GetStringProperty(doc, "file"); // Read from cache DictPtr& cache = dictCache[type][configDirectory][fileName]; if (cache != nullptr) { @@ -147,7 +154,7 @@ public: SegmentationPtr segmentation; // Required: type - string type = GetStringProperty(doc, "type"); + std::string type = GetStringProperty(doc, "type"); if (type == "mmseg") { // Required: dict DictPtr dict = ParseDict(GetObjectProperty(doc, "dict")); @@ -167,7 +174,7 @@ public: } ConversionChainPtr ParseConversionChain(const JSONValue& docs) { - list conversions; + std::list conversions; for (rapidjson::SizeType i = 0; i < docs.Size(); i++) { const JSONValue& doc = docs[i]; if (doc.IsObject()) { @@ -180,7 +187,7 @@ public: return chain; } - string FindConfigFile(string fileName) { + std::string FindConfigFile(std::string fileName) { std::ifstream ifs; // Working directory @@ -190,7 +197,7 @@ public: } // Package data directory if (PACKAGE_DATA_DIRECTORY != "") { - string prefixedFileName = PACKAGE_DATA_DIRECTORY + fileName; + std::string prefixedFileName = PACKAGE_DATA_DIRECTORY + fileName; ifs.open(UTF8Util::GetPlatformString(prefixedFileName).c_str()); if (ifs.is_open()) { return prefixedFileName; @@ -210,26 +217,26 @@ Config::Config() : internal(new ConfigInternal()) {} Config::~Config() { delete (ConfigInternal*)internal; } -ConverterPtr Config::NewFromFile(const string& fileName) { +ConverterPtr Config::NewFromFile(const std::string& fileName) { ConfigInternal* impl = (ConfigInternal*)internal; - string prefixedFileName = impl->FindConfigFile(fileName); + std::string prefixedFileName = impl->FindConfigFile(fileName); std::ifstream ifs(UTF8Util::GetPlatformString(prefixedFileName)); - string content(std::istreambuf_iterator(ifs), - (std::istreambuf_iterator())); + std::string content(std::istreambuf_iterator(ifs), + (std::istreambuf_iterator())); #if defined(_WIN32) || defined(_WIN64) UTF8Util::ReplaceAll(prefixedFileName, "\\", "/"); #endif // if defined(_WIN32) || defined(_WIN64) size_t slashPos = prefixedFileName.rfind("/"); - string configDirectory = ""; - if (slashPos != string::npos) { + std::string configDirectory = ""; + if (slashPos != std::string::npos) { configDirectory = prefixedFileName.substr(0, slashPos) + "/"; } return NewFromString(content, configDirectory); } -ConverterPtr Config::NewFromString(const string& json, - const string& configDirectory) { +ConverterPtr Config::NewFromString(const std::string& json, + const std::string& configDirectory) { rapidjson::Document doc; doc.ParseInsitu<0>(const_cast(json.c_str())); @@ -241,7 +248,7 @@ ConverterPtr Config::NewFromString(const string& json, } // Optional: name - string name; + std::string name; if (doc.HasMember("name") && doc["name"].IsString()) { name = doc["name"].GetString(); } diff --git a/src/Config.hpp b/src/Config.hpp index 6c2f4cf..7a904ce 100644 --- a/src/Config.hpp +++ b/src/Config.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,9 +31,10 @@ public: virtual ~Config(); - ConverterPtr NewFromString(const string& json, const string& configDirectory); + ConverterPtr NewFromString(const std::string& json, + const std::string& configDirectory); - ConverterPtr NewFromFile(const string& fileName); + ConverterPtr NewFromFile(const std::string& fileName); private: void* internal; diff --git a/src/ConfigTest.cpp b/src/ConfigTest.cpp index 021360f..369c9dd 100644 --- a/src/ConfigTest.cpp +++ b/src/ConfigTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,12 @@ * limitations under the License. */ +#include + #include "Config.hpp" #include "ConfigTestBase.hpp" #include "Converter.hpp" +#include "Exception.hpp" #include "TestUtilsUTF8.hpp" namespace opencc { @@ -33,12 +36,12 @@ protected: Config config; ConverterPtr converter; - const string input; - const string expected; + const std::string input; + const std::string expected; }; TEST_F(ConfigTest, Convert) { - const string& converted = converter->Convert(input); + const std::string& converted = converter->Convert(input); EXPECT_EQ(expected, converted); } @@ -50,9 +53,9 @@ TEST_F(ConfigTest, ConvertBuffer) { } TEST_F(ConfigTest, NonexistingPath) { - const string path = "/opencc/no/such/file/or/directory"; + const std::string path = "/opencc/no/such/file/or/directory"; try { - const ConverterPtr converter = config.NewFromFile(path); + const ConverterPtr _ = config.NewFromFile(path); } catch (FileNotFound& e) { EXPECT_EQ(path + " not found or not accessible.", e.what()); } @@ -60,11 +63,11 @@ TEST_F(ConfigTest, NonexistingPath) { TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) { std::ifstream ifs(CONFIG_TEST_PATH); - string content(std::istreambuf_iterator(ifs), - (std::istreambuf_iterator())); - string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test"; + std::string content(std::istreambuf_iterator(ifs), + (std::istreambuf_iterator())); + std::string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test"; - const ConverterPtr converter = + const ConverterPtr _ = config.NewFromString(content, pathWithoutTrailingSlash); } diff --git a/src/ConfigTestBase.hpp b/src/ConfigTestBase.hpp index 874c5b6..fc8bb74 100644 --- a/src/ConfigTestBase.hpp +++ b/src/ConfigTestBase.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ protected: : CONFIG_TEST_PATH(CMAKE_SOURCE_DIR "/test/config_test/config_test.json") {} - const string CONFIG_TEST_PATH; + const std::string CONFIG_TEST_PATH; }; } // namespace opencc diff --git a/src/Conversion.cpp b/src/Conversion.cpp index 089b321..87a5135 100644 --- a/src/Conversion.cpp +++ b/src/Conversion.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ using namespace opencc; -string Conversion::Convert(const char* phrase) const { +std::string Conversion::Convert(const char* phrase) const { std::ostringstream buffer; for (const char* pstr = phrase; *pstr != '\0';) { Optional matched = dict->MatchPrefix(pstr); @@ -38,7 +38,7 @@ string Conversion::Convert(const char* phrase) const { return buffer.str(); } -string Conversion::Convert(const string& phrase) const { +std::string Conversion::Convert(const std::string& phrase) const { return Convert(phrase.c_str()); } diff --git a/src/Conversion.hpp b/src/Conversion.hpp index a28111e..cf73a25 100644 --- a/src/Conversion.hpp +++ b/src/Conversion.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,10 +31,10 @@ public: Conversion(DictPtr _dict) : dict(_dict) {} // Convert single phrase - string Convert(const string& phrase) const; + std::string Convert(const std::string& phrase) const; // Convert single phrase - string Convert(const char* phrase) const; + std::string Convert(const char* phrase) const; // Convert segmented text SegmentsPtr Convert(const SegmentsPtr& input) const; diff --git a/src/ConversionChain.cpp b/src/ConversionChain.cpp index a430db6..2bb29e6 100644 --- a/src/ConversionChain.cpp +++ b/src/ConversionChain.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,14 @@ * limitations under the License. */ +#include + #include "ConversionChain.hpp" #include "Segments.hpp" using namespace opencc; -ConversionChain::ConversionChain(const list _conversions) +ConversionChain::ConversionChain(const std::list _conversions) : conversions(_conversions) {} SegmentsPtr ConversionChain::Convert(const SegmentsPtr& input) const { diff --git a/src/ConversionChain.hpp b/src/ConversionChain.hpp index 6cada7e..28853c6 100644 --- a/src/ConversionChain.hpp +++ b/src/ConversionChain.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #pragma once +#include + #include "Common.hpp" #include "Conversion.hpp" @@ -29,13 +31,13 @@ namespace opencc { */ class OPENCC_EXPORT ConversionChain { public: - ConversionChain(const list _conversions); + ConversionChain(const std::list _conversions); SegmentsPtr Convert(const SegmentsPtr& input) const; - const list GetConversions() const { return conversions; } + const std::list GetConversions() const { return conversions; } private: - const list conversions; + const std::list conversions; }; } // namespace opencc diff --git a/src/ConversionChainTest.cpp b/src/ConversionChainTest.cpp index 3750258..5b45359 100644 --- a/src/ConversionChainTest.cpp +++ b/src/ConversionChainTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ protected: const size_t length = expected->Length(); EXPECT_TRUE(length == actual->Length()); for (size_t i = 0; i < length; i++) { - EXPECT_EQ(string(expected->At(i)), string(actual->At(i))); + EXPECT_EQ(std::string(expected->At(i)), std::string(actual->At(i))); } } @@ -48,7 +48,7 @@ TEST_F(ConversionChainTest, Convert) { const DictPtr& dictVariants = CreateDictForTaiwanVariants(); const ConversionPtr& conversionVariants = ConversionPtr(new Conversion(dictVariants)); - const list conversions{conversion, conversionVariants}; + const std::list conversions{conversion, conversionVariants}; const ConversionChainPtr& conversionChain = ConversionChainPtr(new ConversionChain(conversions)); const SegmentsPtr& converted = diff --git a/src/ConversionTest.cpp b/src/ConversionTest.cpp index 2a69620..04a80a7 100644 --- a/src/ConversionTest.cpp +++ b/src/ConversionTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,17 +33,17 @@ protected: DictPtr dict; ConversionPtr conversion; - const string input; - const string expected; + const std::string input; + const std::string expected; }; TEST_F(ConversionTest, ConvertString) { - const string converted = conversion->Convert(input); + const std::string converted = conversion->Convert(input); EXPECT_EQ(expected, converted); } TEST_F(ConversionTest, ConvertCString) { - const string converted = conversion->Convert(input.c_str()); + const std::string converted = conversion->Convert(input.c_str()); EXPECT_EQ(expected, converted); } diff --git a/src/Converter.cpp b/src/Converter.cpp index 9030b26..209cff4 100644 --- a/src/Converter.cpp +++ b/src/Converter.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,20 +16,22 @@ * limitations under the License. */ -#include "Converter.hpp" +#include + #include "ConversionChain.hpp" +#include "Converter.hpp" #include "Segments.hpp" using namespace opencc; -string Converter::Convert(const string& text) const { +std::string Converter::Convert(const std::string& text) const { const SegmentsPtr& segments = segmentation->Segment(text); const SegmentsPtr& converted = conversionChain->Convert(segments); return converted->ToString(); } size_t Converter::Convert(const char* input, char* output) const { - const string& converted = Convert(input); + const std::string& converted = Convert(input); strcpy(output, converted.c_str()); return converted.length(); } diff --git a/src/Converter.hpp b/src/Converter.hpp index cdfe34e..9f1f3f9 100644 --- a/src/Converter.hpp +++ b/src/Converter.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,12 +28,12 @@ namespace opencc { */ class OPENCC_EXPORT Converter { public: - Converter(const string& _name, SegmentationPtr _segmentation, + Converter(const std::string& _name, SegmentationPtr _segmentation, ConversionChainPtr _conversionChain) : name(_name), segmentation(_segmentation), conversionChain(_conversionChain) {} - string Convert(const string& text) const; + std::string Convert(const std::string& text) const; size_t Convert(const char* input, char* output) const; @@ -44,7 +44,7 @@ public: } private: - const string name; + const std::string name; const SegmentationPtr segmentation; const ConversionChainPtr conversionChain; }; diff --git a/src/DartsDict.cpp b/src/DartsDict.cpp index d7fb844..c91d771 100644 --- a/src/DartsDict.cpp +++ b/src/DartsDict.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,11 @@ * limitations under the License. */ -#include "DartsDict.hpp" +#include +#include + #include "BinaryDict.hpp" +#include "DartsDict.hpp" #include "Lexicon.hpp" #include "darts.h" diff --git a/src/DartsDict.hpp b/src/DartsDict.hpp index 2c539a3..eacc1a8 100644 --- a/src/DartsDict.hpp +++ b/src/DartsDict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/DartsDictTest.cpp b/src/DartsDictTest.cpp index 8c7f2c0..d50cae6 100644 --- a/src/DartsDictTest.cpp +++ b/src/DartsDictTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ protected: fileName("dict.ocd"){}; const DartsDictPtr dartsDict; - const string fileName; + const std::string fileName; }; TEST_F(DartsDictTest, DictTest) { TestDict(dartsDict); } diff --git a/src/Dict.cpp b/src/Dict.cpp index b1012a0..0e6f4e4 100644 --- a/src/Dict.cpp +++ b/src/Dict.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,15 +16,18 @@ * limitations under the License. */ +#include + #include "Dict.hpp" using namespace opencc; Optional Dict::MatchPrefix(const char* word, - size_t len) const { - string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength()); - const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length(); - for (long len = static_cast(wordTrunc.length()); len > 0;) { + size_t wordLen) const { + long len = static_cast((std::min)(KeyMaxLength(), wordLen)); + std::string wordTrunc = UTF8Util::TruncateUTF8(word, len); + const char* wordTruncPtr = wordTrunc.c_str() + len; + for (; len > 0;) { wordTrunc.resize(static_cast(len)); wordTruncPtr = wordTrunc.c_str() + len; const Optional& result = Match(wordTrunc.c_str()); @@ -36,12 +39,13 @@ Optional Dict::MatchPrefix(const char* word, return Optional::Null(); } -vector Dict::MatchAllPrefixes(const char* word, - size_t len) const { - vector matchedLengths; - string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength()); - const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length(); - for (long len = static_cast(wordTrunc.length()); len > 0; +std::vector Dict::MatchAllPrefixes(const char* word, + size_t wordLen) const { + std::vector matchedLengths; + long len = static_cast((std::min)(KeyMaxLength(), wordLen)); + std::string wordTrunc = UTF8Util::TruncateUTF8(word, len); + const char* wordTruncPtr = wordTrunc.c_str() + len; + for (; len > 0; len -= static_cast(UTF8Util::PrevCharLength(wordTruncPtr))) { wordTrunc.resize(static_cast(len)); wordTruncPtr = wordTrunc.c_str() + len; diff --git a/src/Dict.hpp b/src/Dict.hpp index 0a49f1b..461d6d2 100644 --- a/src/Dict.hpp +++ b/src/Dict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ public: /** * Matches a word exactly and returns the DictEntry or Optional::Null(). */ - Optional Match(const string& word) const { + Optional Match(const std::string& word) const { return Match(word.c_str(), word.length()); } @@ -52,7 +52,7 @@ public: /** * Matches the longest matched prefix of a word. */ - Optional MatchPrefix(const string& word) const { + Optional MatchPrefix(const std::string& word) const { return MatchPrefix(word.c_str(), word.length()); } @@ -61,13 +61,14 @@ public: * For example given a dictionary having "a", "an", "b", "ba", "ban", "bana", * all the matched prefixes of "banana" are "bana", "ban", "ba", "b". */ - virtual vector MatchAllPrefixes(const char* word, - size_t len) const; + virtual std::vector MatchAllPrefixes(const char* word, + size_t len) const; /** * Returns all matched prefixes of a word, sorted by the length (desc). */ - vector MatchAllPrefixes(const string& word) const { + std::vector + MatchAllPrefixes(const std::string& word) const { return MatchAllPrefixes(word.c_str(), word.length()); } diff --git a/src/DictConverter.cpp b/src/DictConverter.cpp index 117836b..07cf324 100644 --- a/src/DictConverter.cpp +++ b/src/DictConverter.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,8 @@ using namespace opencc; -DictPtr LoadDictionary(const string& format, const string& inputFileName) { +DictPtr LoadDictionary(const std::string& format, + const std::string& inputFileName) { if (format == "text") { return SerializableDict::NewFromFile(inputFileName); } else if (format == "ocd") { @@ -41,7 +42,7 @@ DictPtr LoadDictionary(const string& format, const string& inputFileName) { return nullptr; } -SerializableDictPtr ConvertDict(const string& format, const DictPtr dict) { +SerializableDictPtr ConvertDict(const std::string& format, const DictPtr dict) { if (format == "text") { return TextDict::NewFromDict(*dict.get()); } else if (format == "ocd") { @@ -57,8 +58,10 @@ SerializableDictPtr ConvertDict(const string& format, const DictPtr dict) { } namespace opencc { -void ConvertDictionary(const string inputFileName, const string outputFileName, - const string formatFrom, const string formatTo) { +void ConvertDictionary(const std::string inputFileName, + const std::string outputFileName, + const std::string formatFrom, + const std::string formatTo) { DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName); SerializableDictPtr dictTo = ConvertDict(formatTo, dictFrom); dictTo->SerializeToFile(outputFileName); diff --git a/src/DictConverter.hpp b/src/DictConverter.hpp index 4efb51c..4bc4e8a 100644 --- a/src/DictConverter.hpp +++ b/src/DictConverter.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2017 BYVoid + * Copyright 2010-2017 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,8 @@ namespace opencc { * Converts a dictionary from a format to another. * @ingroup opencc_cpp_api */ -OPENCC_EXPORT void ConvertDictionary(const string inputFileName, - const string outputFileName, - const string formatFrom, - const string formatTo); +OPENCC_EXPORT void ConvertDictionary(const std::string inputFileName, + const std::string outputFileName, + const std::string formatFrom, + const std::string formatTo); } // namespace opencc diff --git a/src/DictEntry.cpp b/src/DictEntry.cpp index eb55f14..542d4ee 100644 --- a/src/DictEntry.cpp +++ b/src/DictEntry.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ using namespace opencc; -string MultiValueDictEntry::ToString() const { +std::string MultiValueDictEntry::ToString() const { // TODO escape space size_t i = 0; size_t length = Values().size(); diff --git a/src/DictEntry.hpp b/src/DictEntry.hpp index 4370200..7b2babd 100644 --- a/src/DictEntry.hpp +++ b/src/DictEntry.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,13 +33,13 @@ public: virtual std::string Key() const = 0; - virtual vector Values() const = 0; + virtual std::vector Values() const = 0; virtual std::string GetDefault() const = 0; virtual size_t NumValues() const = 0; - virtual string ToString() const = 0; + virtual std::string ToString() const = 0; size_t KeyLength() const { return Key().length(); } @@ -55,42 +55,46 @@ public: class OPENCC_EXPORT NoValueDictEntry : public DictEntry { public: - NoValueDictEntry(const string& _key) : key(_key) {} + NoValueDictEntry(const std::string& _key) : key(_key) {} virtual ~NoValueDictEntry() {} virtual std::string Key() const { return key; } - virtual vector Values() const { return vector(); } + virtual std::vector Values() const { + return std::vector(); + } virtual std::string GetDefault() const { return key; } virtual size_t NumValues() const { return 0; } - virtual string ToString() const { return key; } + virtual std::string ToString() const { return key; } private: - string key; + std::string key; }; class OPENCC_EXPORT SingleValueDictEntry : public DictEntry { public: virtual std::string Value() const = 0; - virtual vector Values() const { - return vector{Value()}; + virtual std::vector Values() const { + return std::vector{Value()}; } virtual std::string GetDefault() const { return Value(); } virtual size_t NumValues() const { return 1; } - virtual string ToString() const { return string(Key()) + "\t" + Value(); } + virtual std::string ToString() const { + return std::string(Key()) + "\t" + Value(); + } }; class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry { public: - StrSingleValueDictEntry(const string& _key, const string& _value) + StrSingleValueDictEntry(const std::string& _key, const std::string& _value) : key(_key), value(_value) {} virtual ~StrSingleValueDictEntry() {} @@ -100,8 +104,8 @@ public: virtual std::string Value() const { return value; } private: - string key; - string value; + std::string key; + std::string value; }; class OPENCC_EXPORT MultiValueDictEntry : public DictEntry { @@ -114,12 +118,13 @@ public: } } - virtual string ToString() const; + virtual std::string ToString() const; }; class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry { public: - StrMultiValueDictEntry(const string& _key, const vector& _values) + StrMultiValueDictEntry(const std::string& _key, + const std::vector& _values) : key(_key), values(_values) {} virtual ~StrMultiValueDictEntry() {} @@ -128,22 +133,25 @@ public: size_t NumValues() const { return values.size(); } - vector Values() const { return values; } + std::vector Values() const { return values; } private: - string key; - vector values; + std::string key; + std::vector values; }; class OPENCC_EXPORT DictEntryFactory { public: - static DictEntry* New(const string& key) { return new NoValueDictEntry(key); } + static DictEntry* New(const std::string& key) { + return new NoValueDictEntry(key); + } - static DictEntry* New(const string& key, const string& value) { + static DictEntry* New(const std::string& key, const std::string& value) { return new StrSingleValueDictEntry(key, value); } - static DictEntry* New(const string& key, const vector& values) { + static DictEntry* New(const std::string& key, + const std::vector& values) { if (values.size() == 0) { return New(key); } else if (values.size() == 1) { @@ -156,11 +164,9 @@ public: if (entry->NumValues() == 0) { return new NoValueDictEntry(entry->Key()); } else if (entry->NumValues() == 1) { - const auto svEntry = static_cast(entry); - return new StrSingleValueDictEntry(svEntry->Key(), svEntry->Value()); + return new StrSingleValueDictEntry(entry->Key(), entry->Values().front()); } else { - const auto mvEntry = static_cast(entry); - return new StrMultiValueDictEntry(mvEntry->Key(), mvEntry->Values()); + return new StrMultiValueDictEntry(entry->Key(), entry->Values()); } } }; diff --git a/src/DictGroup.cpp b/src/DictGroup.cpp index ec14ae3..4ca9e33 100644 --- a/src/DictGroup.cpp +++ b/src/DictGroup.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,15 @@ * limitations under the License. */ +#include + #include "DictGroup.hpp" #include "Lexicon.hpp" #include "TextDict.hpp" using namespace opencc; -DictGroup::DictGroup(const list& _dicts) +DictGroup::DictGroup(const std::list& _dicts) : keyMaxLength(0), dicts(_dicts) {} DictGroup::~DictGroup() {} @@ -51,21 +53,22 @@ Optional DictGroup::MatchPrefix(const char* word, return Optional::Null(); } -vector DictGroup::MatchAllPrefixes(const char* word, - size_t len) const { +std::vector DictGroup::MatchAllPrefixes(const char* word, + size_t len) const { std::map matched; // Match all prefixes from all dictionaries for (const auto& dict : dicts) { - const vector& entries = dict->MatchAllPrefixes(word, len); + const std::vector& entries = + dict->MatchAllPrefixes(word, len); for (const auto& entry : entries) { - size_t len = entry->KeyLength(); + size_t entryLen = entry->KeyLength(); // If the current length has already result, skip - if (matched.find(len) == matched.end()) { - matched[len] = entry; + if (matched.find(entryLen) == matched.end()) { + matched[entryLen] = entry; } } } - vector matchedEntries; + std::vector matchedEntries; for (auto i = matched.rbegin(); i != matched.rend(); i++) { matchedEntries.push_back(i->second); } @@ -87,5 +90,5 @@ LexiconPtr DictGroup::GetLexicon() const { DictGroupPtr DictGroup::NewFromDict(const Dict& dict) { TextDictPtr newDict = TextDict::NewFromDict(dict); - return DictGroupPtr(new DictGroup(list{newDict})); + return DictGroupPtr(new DictGroup(std::list{newDict})); } diff --git a/src/DictGroup.hpp b/src/DictGroup.hpp index 3ce0dce..fd51d91 100644 --- a/src/DictGroup.hpp +++ b/src/DictGroup.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #pragma once +#include + #include "Common.hpp" #include "Dict.hpp" @@ -28,7 +30,7 @@ namespace opencc { */ class OPENCC_EXPORT DictGroup : public Dict { public: - DictGroup(const list& dicts); + DictGroup(const std::list& dicts); static DictGroupPtr NewFromDict(const Dict& dict); @@ -41,15 +43,15 @@ public: virtual Optional MatchPrefix(const char* word, size_t len) const; - virtual vector MatchAllPrefixes(const char* word, - size_t len) const; + virtual std::vector MatchAllPrefixes(const char* word, + size_t len) const; virtual LexiconPtr GetLexicon() const; - const list GetDicts() const { return dicts; } + const std::list GetDicts() const { return dicts; } private: const size_t keyMaxLength; - const list dicts; + const std::list dicts; }; } // namespace opencc diff --git a/src/DictGroupTest.cpp b/src/DictGroupTest.cpp index 37d01b7..7003506 100644 --- a/src/DictGroupTest.cpp +++ b/src/DictGroupTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ TEST_F(DictGroupTest, SimpleGroupTest) { TEST_F(DictGroupTest, TaiwanPhraseGroupTest) { const DictGroupPtr dictGroup(new DictGroup( - list{CreateDictForPhrases(), CreateTaiwanPhraseDict()})); + std::list{CreateDictForPhrases(), CreateTaiwanPhraseDict()})); { const auto& entry = dictGroup->Dict::MatchPrefix(utf8("鼠标")); EXPECT_EQ(utf8("鼠標"), entry.Get()->GetDefault()); diff --git a/src/DictGroupTestBase.hpp b/src/DictGroupTestBase.hpp index 220b100..db33f73 100644 --- a/src/DictGroupTestBase.hpp +++ b/src/DictGroupTestBase.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ protected: DictPtr phrasesDict = CreateDictForPhrases(); DictPtr charactersDict = CreateDictForCharacters(); DictGroupPtr dictGroup( - new DictGroup(list{phrasesDict, charactersDict})); + new DictGroup(std::list{phrasesDict, charactersDict})); return dictGroup; } }; diff --git a/src/Exception.hpp b/src/Exception.hpp index 5487375..7875537 100644 --- a/src/Exception.hpp +++ b/src/Exception.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/Export.hpp b/src/Export.hpp index e511b8a..0015e88 100644 --- a/src/Export.hpp +++ b/src/Export.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/Lexicon.cpp b/src/Lexicon.cpp new file mode 100644 index 0000000..c3caf66 --- /dev/null +++ b/src/Lexicon.cpp @@ -0,0 +1,33 @@ +/* + * Open Chinese Convert + * + * Copyright 2020 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "Lexicon.hpp" +namespace opencc { + +void Lexicon::Sort() { + std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan); +} + +bool Lexicon::IsSorted() { + return std::is_sorted(entries.begin(), entries.end(), + DictEntry::UPtrLessThan); +} + +} // namespace opencc diff --git a/src/Lexicon.hpp b/src/Lexicon.hpp index e54f2f9..83d1bc2 100644 --- a/src/Lexicon.hpp +++ b/src/Lexicon.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace opencc { class OPENCC_EXPORT Lexicon { public: Lexicon() {} - Lexicon(vector> entries_) + Lexicon(std::vector> entries_) : entries(std::move(entries_)) {} Lexicon(const Lexicon&) = delete; Lexicon& operator=(const Lexicon&) = delete; @@ -41,28 +41,23 @@ public: entries.push_back(std::move(entry)); } - void Sort() { - std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan); - } + void Sort(); - bool IsSorted() { - return std::is_sorted(entries.begin(), entries.end(), - DictEntry::UPtrLessThan); - } + bool IsSorted(); const DictEntry* At(size_t index) const { return entries.at(index).get(); } size_t Length() const { return entries.size(); } - vector>::const_iterator begin() const { + std::vector>::const_iterator begin() const { return entries.begin(); } - vector>::const_iterator end() const { + std::vector>::const_iterator end() const { return entries.end(); } private: - vector> entries; + std::vector> entries; }; } // namespace opencc diff --git a/src/MarisaDict.cpp b/src/MarisaDict.cpp index 58a806d..d01dd52 100644 --- a/src/MarisaDict.cpp +++ b/src/MarisaDict.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,12 @@ * limitations under the License. */ -#include "marisa.h" +#include +#include #include +#include "marisa.h" + #include "Lexicon.hpp" #include "MarisaDict.hpp" #include "SerializedValues.hpp" @@ -70,12 +73,12 @@ Optional MarisaDict::MatchPrefix(const char* word, } } -vector MarisaDict::MatchAllPrefixes(const char* word, - size_t len) const { +std::vector MarisaDict::MatchAllPrefixes(const char* word, + size_t len) const { const marisa::Trie& trie = *internal->marisa; marisa::Agent agent; agent.set_query(word, (std::min)(maxLength, len)); - vector matches; + std::vector matches; while (trie.common_prefix_search(agent)) { matches.push_back(lexicon->At(agent.key().id())); } @@ -103,7 +106,7 @@ MarisaDictPtr MarisaDict::NewFromFile(FILE* fp) { // Extract lexicon from built Marisa Trie, in order to get the order of keys. marisa::Agent agent; agent.set_query(""); - vector> entries; + std::vector> entries; entries.resize(values_lexicon->Length()); size_t maxLength = 0; while (dict->internal->marisa->predictive_search(agent)) { @@ -138,7 +141,7 @@ MarisaDictPtr MarisaDict::NewFromDict(const Dict& thatDict) { // Extract lexicon from built Marisa Trie, in order to get the order of keys. marisa::Agent agent; agent.set_query(""); - vector> entries; + std::vector> entries; entries.resize(thatLexicon->Length()); while (dict->internal->marisa->predictive_search(agent)) { std::string key(agent.key().ptr(), agent.key().length()); diff --git a/src/MarisaDict.hpp b/src/MarisaDict.hpp index e66dcd0..6917c23 100644 --- a/src/MarisaDict.hpp +++ b/src/MarisaDict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ public: virtual Optional MatchPrefix(const char* word, size_t len) const; - virtual vector MatchAllPrefixes(const char* word, - size_t len) const; + virtual std::vector MatchAllPrefixes(const char* word, + size_t len) const; virtual LexiconPtr GetLexicon() const; diff --git a/src/MarisaDictTest.cpp b/src/MarisaDictTest.cpp index dec91f2..f1c88a1 100644 --- a/src/MarisaDictTest.cpp +++ b/src/MarisaDictTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ protected: : dict(MarisaDict::NewFromDict(*textDict)), fileName("dict.ocd2"){}; const MarisaDictPtr dict; - const string fileName; + const std::string fileName; }; TEST_F(MarisaDictTest, DictTest) { TestDict(dict); } diff --git a/src/MaxMatchSegmentation.cpp b/src/MaxMatchSegmentation.cpp index c08c6e6..5cdd79f 100644 --- a/src/MaxMatchSegmentation.cpp +++ b/src/MaxMatchSegmentation.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ using namespace opencc; -SegmentsPtr MaxMatchSegmentation::Segment(const string& text) const { +SegmentsPtr MaxMatchSegmentation::Segment(const std::string& text) const { SegmentsPtr segments(new Segments); const char* segStart = text.c_str(); size_t segLength = 0; diff --git a/src/MaxMatchSegmentation.hpp b/src/MaxMatchSegmentation.hpp index cd339f1..1ecc227 100644 --- a/src/MaxMatchSegmentation.hpp +++ b/src/MaxMatchSegmentation.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ public: virtual ~MaxMatchSegmentation() {} - virtual SegmentsPtr Segment(const string& text) const; + virtual SegmentsPtr Segment(const std::string& text) const; const DictPtr GetDict() const { return dict; } diff --git a/src/MaxMatchSegmentationTest.cpp b/src/MaxMatchSegmentationTest.cpp index e761c90..775c7ef 100644 --- a/src/MaxMatchSegmentationTest.cpp +++ b/src/MaxMatchSegmentationTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ protected: TEST_F(MaxMatchSegmentationTest, Segment) { const auto& segments = segmenter->Segment(utf8("太后的头发干燥")); EXPECT_EQ(4, segments->Length()); - EXPECT_EQ(utf8("太后"), string(segments->At(0))); - EXPECT_EQ(utf8("的"), string(segments->At(1))); - EXPECT_EQ(utf8("头发"), string(segments->At(2))); - EXPECT_EQ(utf8("干燥"), string(segments->At(3))); + EXPECT_EQ(utf8("太后"), std::string(segments->At(0))); + EXPECT_EQ(utf8("的"), std::string(segments->At(1))); + EXPECT_EQ(utf8("头发"), std::string(segments->At(2))); + EXPECT_EQ(utf8("干燥"), std::string(segments->At(3))); } } // namespace opencc diff --git a/src/Optional.hpp b/src/Optional.hpp index 9e4eb1b..8a510f4 100644 --- a/src/Optional.hpp +++ b/src/Optional.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/PhraseExtract.cpp b/src/PhraseExtract.cpp index 243c5ca..69c21bb 100644 --- a/src/PhraseExtract.cpp +++ b/src/PhraseExtract.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015-2020 BYVoid + * Copyright 2015-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ * limitations under the License. */ +#include #include #include @@ -23,12 +24,16 @@ #include "PhraseExtract.hpp" +#ifdef _MSC_VER +#pragma execution_character_set("utf-8") +#endif + namespace opencc { namespace internal { bool ContainsPunctuation(const PhraseExtract::UTF8StringSlice8Bit& word) { - static const vector punctuations = { + static const std::vector punctuations = { " ", "\n", "\r", "\t", "-", ",", ".", "?", "!", "*", " ", ",", "。", "、", ";", ":", "?", "!", "…", "“", "”", "「", "」", "—", "-", "(", ")", "《", "》", ".", "/", "ï¼¼"}; @@ -68,7 +73,7 @@ public: marisa_trie.clear(); } - const vector& Items() const { return items; } + const std::vector& Items() const { return items; } void Build() { BuildKeys(); @@ -96,7 +101,7 @@ private: void BuildTrie() { std::unordered_map key_item_id_map; marisa::Keyset keyset; - for (size_t i = 0; i < items.size(); i++) { + for (int i = 0; i < items.size(); i++) { const auto& key = items[i].first; key_item_id_map[key.ToString()] = i; keyset.push_back(key.CString(), key.ByteLength()); @@ -120,7 +125,7 @@ private: std::unordered_map dict; - vector items; + std::vector items; marisa::Trie marisa_trie; std::vector marisa_id_item_map; }; @@ -277,7 +282,7 @@ typedef std::unordered_map void CalculatePrefixSuffixEntropy( - const vector& presuffixes, + const std::vector& presuffixes, const PhraseExtract::LengthType setLength, const PhraseExtract::LengthType wordMinLength, const PhraseExtract::LengthType wordMaxLength, diff --git a/src/PhraseExtract.hpp b/src/PhraseExtract.hpp index 898e7ab..75a165d 100644 --- a/src/PhraseExtract.hpp +++ b/src/PhraseExtract.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #pragma once +#include #include #include "Common.hpp" @@ -35,7 +36,7 @@ public: virtual ~PhraseExtract(); - void Extract(const string& text) { + void Extract(const std::string& text) { SetFullText(text); ExtractSuffixes(); CalculateFrequency(); @@ -49,7 +50,7 @@ public: SelectWords(); } - void SetFullText(const string& fullText) { + void SetFullText(const std::string& fullText) { utf8FullText = UTF8StringSlice(fullText.c_str()); } @@ -88,13 +89,13 @@ public: postCalculationFilter = filter; } - void ReleaseSuffixes() { vector().swap(suffixes); } + void ReleaseSuffixes() { std::vector().swap(suffixes); } - void ReleasePrefixes() { vector().swap(prefixes); } + void ReleasePrefixes() { std::vector().swap(prefixes); } - const vector& Words() const { return words; } + const std::vector& Words() const { return words; } - const vector& WordCandidates() const { + const std::vector& WordCandidates() const { return wordCandidates; } @@ -182,10 +183,10 @@ private: UTF8StringSlice utf8FullText; size_t totalOccurrence; double logTotalOccurrence; - vector prefixes; - vector suffixes; - vector wordCandidates; - vector words; + std::vector prefixes; + std::vector suffixes; + std::vector wordCandidates; + std::vector words; DictType* signals; friend class PhraseExtractTest; diff --git a/src/PhraseExtractTest.cpp b/src/PhraseExtractTest.cpp index ed49330..decefac 100644 --- a/src/PhraseExtractTest.cpp +++ b/src/PhraseExtractTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,18 +32,18 @@ protected: : siShi(utf8("四是四十是十十四是十四四十是四十")), punctuation(utf8("一.二.三")) {} - const vector& Suffixes() const { + const std::vector& Suffixes() const { return phraseExtract.suffixes; } - const vector& Prefixes() const { + const std::vector& Prefixes() const { return phraseExtract.prefixes; } PhraseExtract phraseExtract; - const string siShi; - const string punctuation; + const std::string siShi; + const std::string punctuation; }; TEST_F(PhraseExtractTest, ExtractSuffixes) { @@ -53,7 +53,7 @@ TEST_F(PhraseExtractTest, ExtractSuffixes) { phraseExtract.SetFullText(siShi); phraseExtract.ExtractSuffixes(); EXPECT_EQ( - vector( + std::vector( {"十", "十十四是", "十四四十", "十四是十", "十是十十", "十是四十", "四十", "四十是十", "四十是四", "四四十是", "四是十四", "四是四十", "是十十四", "是十四四", "是四十", "是四十是"}), @@ -67,7 +67,7 @@ TEST_F(PhraseExtractTest, ExtractPrefixes) { phraseExtract.SetFullText(siShi); phraseExtract.ExtractPrefixes(); EXPECT_EQ( - vector( + std::vector( {"十是十十", "十四四十", "十是四十", "四是四十", "四十是十", "十四是十", "四", "是十十四", "四是十四", "是十四四", "四十是四", "四是四", "四四十是", "是四十是", "四是", "十十四是"}), @@ -100,7 +100,7 @@ TEST_F(PhraseExtractTest, ExtractWordCandidates) { phraseExtract.SetWordMaxLength(3); phraseExtract.SetFullText(siShi); phraseExtract.ExtractWordCandidates(); - EXPECT_EQ(vector( + EXPECT_EQ(std::vector( {"十", "四", "是", "四十", "十四", "十是", "四十是", "四是", "是十", "是四", "是四十", "十十", "十十四", "十四四", "十四是", "十是十", "十是四", "四四", @@ -162,10 +162,10 @@ TEST_F(PhraseExtractTest, SelectWords) { return phraseExtract.Frequency(word) == 1; }); phraseExtract.SelectWords(); - EXPECT_EQ( - vector({"十", "四", "是", "四十", "十四", "十是", - "四十是", "四是", "是十", "是四", "是四十"}), - phraseExtract.Words()); + EXPECT_EQ(std::vector({"十", "四", "是", "四十", "十四", + "十是", "四十是", "四是", "是十", + "是四", "是四十"}), + phraseExtract.Words()); } TEST_F(PhraseExtractTest, Punctuation) { @@ -175,7 +175,7 @@ TEST_F(PhraseExtractTest, Punctuation) { phraseExtract.SetFullText(punctuation); phraseExtract.ExtractPrefixes(); EXPECT_EQ( - vector({"一.", ".二.", "一", "二.三", "一.二"}), + std::vector({"一.", ".二.", "一", "二.三", "一.二"}), Prefixes()); } diff --git a/src/Segmentation.cpp b/src/Segmentation.cpp index fb31c8a..eea099d 100644 --- a/src/Segmentation.cpp +++ b/src/Segmentation.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/Segmentation.hpp b/src/Segmentation.hpp index 24c824c..c260ea6 100644 --- a/src/Segmentation.hpp +++ b/src/Segmentation.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,6 @@ namespace opencc { */ class OPENCC_EXPORT Segmentation { public: - virtual SegmentsPtr Segment(const string& text) const = 0; + virtual SegmentsPtr Segment(const std::string& text) const = 0; }; } // namespace opencc diff --git a/src/Segments.hpp b/src/Segments.hpp index d1d664d..21c3981 100644 --- a/src/Segments.hpp +++ b/src/Segments.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ #pragma once +#include + #include "Common.hpp" namespace opencc { @@ -30,13 +32,13 @@ public: Segments() {} Segments(std::initializer_list initList) { - for (const string& item : initList) { + for (const std::string& item : initList) { AddSegment(item); } } - Segments(std::initializer_list initList) { - for (const string& item : initList) { + Segments(std::initializer_list initList) { + for (const std::string& item : initList) { AddSegment(item); } } @@ -46,7 +48,7 @@ public: unmanaged.push_back(unmanagedString); } - void AddSegment(const string& str) { + void AddSegment(const std::string& str) { indexes.push_back(std::make_pair(managed.size(), true)); managed.push_back(str); } @@ -91,7 +93,7 @@ public: iterator end() const { return iterator(this, indexes.size()); } - string ToString() const { + std::string ToString() const { // TODO implement a nested structure to reduce concatenation, // like a purely functional differential list std::ostringstream buffer; @@ -104,9 +106,9 @@ public: private: Segments(const Segments&) {} - vector unmanaged; - vector managed; + std::vector unmanaged; + std::vector managed; // index, managed - vector> indexes; + std::vector> indexes; }; } // namespace opencc diff --git a/src/SerializableDict.hpp b/src/SerializableDict.hpp index 42ca781..17cea89 100644 --- a/src/SerializableDict.hpp +++ b/src/SerializableDict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ public: /** * Serializes the dictionary and writes in to a file. */ - virtual void SerializeToFile(const string& fileName) const { + virtual void SerializeToFile(const std::string& fileName) const { FILE* fp = fopen(fileName.c_str(), "wb"); if (fp == NULL) { throw FileNotWritable(fileName); @@ -45,7 +45,7 @@ public: } template - static bool TryLoadFromFile(const string& fileName, + static bool TryLoadFromFile(const std::string& fileName, std::shared_ptr* dict) { FILE* fp = #ifdef _MSC_VER @@ -66,7 +66,7 @@ public: } template - static std::shared_ptr NewFromFile(const string& fileName) { + static std::shared_ptr NewFromFile(const std::string& fileName) { std::shared_ptr dict; if (!TryLoadFromFile(fileName, &dict)) { throw FileNotFound(fileName); diff --git a/src/SerializedValues.cpp b/src/SerializedValues.cpp index c9fee77..54f23e0 100644 --- a/src/SerializedValues.cpp +++ b/src/SerializedValues.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,11 @@ * limitations under the License. */ -#include "SerializedValues.hpp" +#include +#include + #include "Lexicon.hpp" +#include "SerializedValues.hpp" using namespace opencc; @@ -44,8 +47,8 @@ template void WriteInteger(FILE* fp, INT_TYPE num) { size_t SerializedValues::KeyMaxLength() const { return 0; } void SerializedValues::SerializeToFile(FILE* fp) const { - string valueBuf; - vector valueBytes; + std::string valueBuf; + std::vector valueBytes; uint32_t valueTotalLength = 0; ConstructBuffer(&valueBuf, &valueBytes, &valueTotalLength); // Number of items @@ -78,7 +81,7 @@ std::shared_ptr SerializedValues::NewFromFile(FILE* fp) { // Values uint32_t valueTotalLength = ReadInteger(fp); - string valueBuffer; + std::string valueBuffer; valueBuffer.resize(valueTotalLength); size_t unitsRead = fread(const_cast(valueBuffer.c_str()), sizeof(char), valueTotalLength, fp); @@ -92,7 +95,7 @@ std::shared_ptr SerializedValues::NewFromFile(FILE* fp) { // Number of values uint16_t numValues = ReadInteger(fp); // Value offset - vector values; + std::vector values; for (uint16_t j = 0; j < numValues; j++) { const char* value = pValueBuffer; uint16_t numValueBytes = ReadInteger(fp); @@ -106,15 +109,15 @@ std::shared_ptr SerializedValues::NewFromFile(FILE* fp) { return dict; } -void SerializedValues::ConstructBuffer(string* valueBuffer, - vector* valueBytes, +void SerializedValues::ConstructBuffer(std::string* valueBuffer, + std::vector* valueBytes, uint32_t* valueTotalLength) const { *valueTotalLength = 0; // Calculate total length. for (const std::unique_ptr& entry : *lexicon) { assert(entry->NumValues() != 0); for (const auto& value : entry->Values()) { - *valueTotalLength += value.length() + 1; + *valueTotalLength += static_cast(value.length()) + 1; } } // Write values to the buffer. diff --git a/src/SerializedValues.hpp b/src/SerializedValues.hpp index c75b488..c17210b 100644 --- a/src/SerializedValues.hpp +++ b/src/SerializedValues.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ public: private: LexiconPtr lexicon; - void ConstructBuffer(string* valueBuffer, vector* valueBytes, + void ConstructBuffer(std::string* valueBuffer, + std::vector* valueBytes, uint32_t* valueTotalLength) const; }; } // namespace opencc diff --git a/src/SerializedValuesTest.cpp b/src/SerializedValuesTest.cpp index 107ed4a..2525146 100644 --- a/src/SerializedValuesTest.cpp +++ b/src/SerializedValuesTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ protected: fileName("dict.bin"){}; const std::shared_ptr binDict; - const string fileName; + const std::string fileName; }; TEST_F(SerializedValuesTest, Serialization) { diff --git a/src/SimpleConverter.cpp b/src/SimpleConverter.cpp index 0a223fc..417f7a5 100644 --- a/src/SimpleConverter.cpp +++ b/src/SimpleConverter.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,12 +56,12 @@ std::string SimpleConverter::Convert(const std::string& input) const { } std::string SimpleConverter::Convert(const char* input) const { - return Convert(string(input)); + return Convert(std::string(input)); } std::string SimpleConverter::Convert(const char* input, size_t length) const { if (length == static_cast(-1)) { - return Convert(string(input)); + return Convert(std::string(input)); } else { return Convert(UTF8Util::FromSubstr(input, length)); } @@ -81,12 +81,12 @@ size_t SimpleConverter::Convert(const char* input, size_t length, if (length == static_cast(-1)) { return Convert(input, output); } else { - string trimmed = UTF8Util::FromSubstr(input, length); + std::string trimmed = UTF8Util::FromSubstr(input, length); return Convert(trimmed.c_str(), output); } } -static string cError; +static std::string cError; opencc_t opencc_open_internal(const char* configFileName) { try { diff --git a/src/SimpleConverter.hpp b/src/SimpleConverter.hpp index eacf7c1..448b221 100644 --- a/src/SimpleConverter.hpp +++ b/src/SimpleConverter.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,21 +51,22 @@ public: /** * Converts a text - * @param input A C-Style string (terminated by '\0') to be converted. + * @param input A C-Style std::string (terminated by '\0') to be converted. */ std::string Convert(const char* input) const; /** * Converts a text - * @param input A C-Style string limited by a given length to be converted. - * @param length Maximal length in byte of the input string. + * @param input A C-Style std::string limited by a given length to be + * converted. + * @param length Maximal length in byte of the input std::string. */ std::string Convert(const char* input, size_t length) const; /** * Converts a text and writes to an allocated buffer * Please make sure the buffer has sufficent space. - * @param input A C-Style string (terminated by '\0') to be converted. + * @param input A C-Style std::string (terminated by '\0') to be converted. * @param output Buffer to write the converted text. * @return Length of converted text. */ @@ -74,8 +75,9 @@ public: /** * Converts a text and writes to an allocated buffer * Please make sure the buffer has sufficent space. - * @param input A C-Style string limited by a given length to be converted. - * @param length Maximal length in byte of the input string. + * @param input A C-Style std::string limited by a given length to be + * converted. + * @param length Maximal length in byte of the input std::string. * @param output Buffer to write the converted text. * @return Length of converted text. */ diff --git a/src/SimpleConverterTest.cpp b/src/SimpleConverterTest.cpp index 261c388..9c645b7 100644 --- a/src/SimpleConverterTest.cpp +++ b/src/SimpleConverterTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ protected: void TestConverter(const std::string& config) const { const SimpleConverter converter(config); - const string& converted = + const std::string& converted = converter.Convert(utf8("燕燕于飞差池其羽之子于归远送于野")); EXPECT_EQ(utf8("燕燕于飛差池其羽之子于歸遠送於野"), converted); } @@ -51,8 +51,8 @@ TEST_F(SimpleConverterTest, Multithreading) { } TEST_F(SimpleConverterTest, CInterface) { - const string& text = utf8("燕燕于飞差池其羽之子于归远送于野"); - const string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); + const std::string& text = utf8("燕燕于飞差池其羽之子于归远送于野"); + const std::string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); { opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); @@ -70,7 +70,7 @@ TEST_F(SimpleConverterTest, CInterface) { EXPECT_EQ(0, opencc_close(od)); } { - string path = "/opencc/no/such/file/or/directory"; + std::string path = "/opencc/no/such/file/or/directory"; opencc_t od = opencc_open(path.c_str()); EXPECT_EQ(reinterpret_cast(-1), od); EXPECT_EQ(path + " not found or not accessible.", opencc_error()); diff --git a/src/TestUtils.hpp b/src/TestUtils.hpp index fd7c166..46ab90d 100644 --- a/src/TestUtils.hpp +++ b/src/TestUtils.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/TestUtilsUTF8.hpp b/src/TestUtilsUTF8.hpp index 37f01b7..6378405 100644 --- a/src/TestUtilsUTF8.hpp +++ b/src/TestUtilsUTF8.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2020 BYVoid + * Copyright 2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,24 +22,6 @@ namespace opencc { -#if defined(_MSC_VER) && _MSC_VER > 1310 -// Visual C++ 2005 and later require the source files in UTF-8, and all strings -// to be encoded as wchar_t otherwise the strings will be converted into the -// local multibyte encoding and cause errors. To use a wchar_t as UTF-8, these -// strings then need to be convert back to UTF-8. This function is just a rough -// example of how to do this. -#include -#define utf8(str) ConvertToUTF8(L##str) -std::string ConvertToUTF8(const wchar_t* pStr) { - static char szBuf[1024]; - WideCharToMultiByte(CP_UTF8, 0, pStr, -1, szBuf, sizeof(szBuf), NULL, NULL); - return szBuf; -} - -#else // if defined(_MSC_VER) && _MSC_VER > 1310 -// Visual C++ 2003 and gcc will use the string literals as is, so the files -// should be saved as UTF-8. gcc requires the files to not have a UTF-8 BOM. #define utf8(str) std::string(str) -#endif // if defined(_MSC_VER) && _MSC_VER > 1310 } // namespace opencc diff --git a/src/TextDict.cpp b/src/TextDict.cpp index f337717..e082112 100644 --- a/src/TextDict.cpp +++ b/src/TextDict.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,11 @@ * limitations under the License. */ -#include "TextDict.hpp" +#include +#include + #include "Lexicon.hpp" +#include "TextDict.hpp" using namespace opencc; @@ -34,16 +37,17 @@ static DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { size_t length; const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { - throw InvalidTextDictionary("Tabular not found " + string(buff), lineNum); + throw InvalidTextDictionary("Tabular not found " + std::string(buff), + lineNum); } length = static_cast(pbuff - buff); - string key = UTF8Util::FromSubstr(buff, length); - vector values; + std::string key = UTF8Util::FromSubstr(buff, length); + std::vector values; while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { buff = pbuff = UTF8Util::NextChar(pbuff); pbuff = UTF8Util::FindNextInline(buff, ' '); length = static_cast(pbuff - buff); - const string& value = UTF8Util::FromSubstr(buff, length); + const std::string& value = UTF8Util::FromSubstr(buff, length); values.push_back(value); } if (values.size() == 0) { @@ -93,7 +97,8 @@ TextDictPtr TextDict::NewFromDict(const Dict& dict) { size_t TextDict::KeyMaxLength() const { return maxLength; } Optional TextDict::Match(const char* word, size_t len) const { - std::unique_ptr entry(new NoValueDictEntry(word)); + std::unique_ptr entry( + new NoValueDictEntry(std::string(word, len))); const auto& found = std::lower_bound(lexicon->begin(), lexicon->end(), entry, DictEntry::UPtrLessThan); if ((found != lexicon->end()) && ((*found)->Key() == entry->Key())) { diff --git a/src/TextDict.hpp b/src/TextDict.hpp index faf57e0..f1cb67d 100644 --- a/src/TextDict.hpp +++ b/src/TextDict.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2020 BYVoid + * Copyright 2010-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/TextDictTest.cpp b/src/TextDictTest.cpp index 8fbb1c0..2d1455b 100644 --- a/src/TextDictTest.cpp +++ b/src/TextDictTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ class TextDictTest : public TextDictTestBase { protected: TextDictTest() : fileName("dict.txt"){}; - const string fileName; + const std::string fileName; }; TEST_F(TextDictTest, DictTest) { TestDict(textDict); } diff --git a/src/TextDictTestBase.hpp b/src/TextDictTestBase.hpp index 34d270f..e376ffc 100644 --- a/src/TextDictTestBase.hpp +++ b/src/TextDictTestBase.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015-2020 BYVoid + * Copyright 2015-2020 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,14 +43,15 @@ protected: DictPtr CreateDictForCharacters() const { LexiconPtr lexicon(new Lexicon); - lexicon->Add(DictEntryFactory::New(utf8("后"), - vector{utf8("后"), utf8("後")})); - lexicon->Add(DictEntryFactory::New(utf8("发"), - vector{utf8("發"), utf8("é«®")})); lexicon->Add(DictEntryFactory::New( - utf8("å¹²"), vector{utf8("å¹¹"), utf8("ä¹¾"), utf8("å¹²")})); - lexicon->Add(DictEntryFactory::New(utf8("里"), - vector{utf8("裏"), utf8("里")})); + utf8("后"), std::vector{utf8("后"), utf8("後")})); + lexicon->Add(DictEntryFactory::New( + utf8("发"), std::vector{utf8("發"), utf8("é«®")})); + lexicon->Add(DictEntryFactory::New( + utf8("å¹²"), + std::vector{utf8("å¹¹"), utf8("ä¹¾"), utf8("å¹²")})); + lexicon->Add(DictEntryFactory::New( + utf8("里"), std::vector{utf8("裏"), utf8("里")})); lexicon->Sort(); return TextDictPtr(new TextDict(lexicon)); } @@ -68,8 +69,7 @@ protected: DictPtr CreateDictForTaiwanVariants() const { LexiconPtr lexicon(new Lexicon); lexicon->Add(DictEntryFactory::New(utf8("裏"), utf8("裡"))); - TextDictPtr textDict(new TextDict(lexicon)); - return textDict; + return TextDictPtr(new TextDict(lexicon)); } DictPtr CreateTaiwanPhraseDict() const { @@ -108,7 +108,7 @@ protected: EXPECT_EQ(utf8("BYVoid"), entry.Get()->Key()); EXPECT_EQ(utf8("byv"), entry.Get()->GetDefault()); - entry = dict->MatchPrefix("清華大學"); + entry = dict->MatchPrefix(utf8("清華大學")); EXPECT_TRUE(!entry.IsNull()); EXPECT_EQ(utf8("清華大學"), entry.Get()->Key()); EXPECT_EQ(utf8("TsinghuaUniversity"), entry.Get()->GetDefault()); @@ -131,7 +131,7 @@ protected: } void TestMatchAllPrefixes(const DictPtr& dict) const { - const vector matches = + const std::vector matches = dict->MatchAllPrefixes(utf8("清華大學計算機系")); EXPECT_EQ(3, matches.size()); EXPECT_EQ(utf8("清華大學"), matches.at(0)->Key()); diff --git a/src/UTF8StringSlice.cpp b/src/UTF8StringSlice.cpp index 51845b8..f55a069 100644 --- a/src/UTF8StringSlice.cpp +++ b/src/UTF8StringSlice.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/UTF8StringSlice.hpp b/src/UTF8StringSlice.hpp index e57dc69..0c9dfdc 100644 --- a/src/UTF8StringSlice.hpp +++ b/src/UTF8StringSlice.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ * limitations under the License. */ +#include + #include "Common.hpp" #include "UTF8Util.hpp" @@ -105,7 +107,7 @@ public: } } - string ToString() const { return string(str, str + byteLength); } + std::string ToString() const { return std::string(str, str + byteLength); } const char* CString() const { return str; } diff --git a/src/UTF8StringSliceTest.cpp b/src/UTF8StringSliceTest.cpp index 1adbc02..c3fcc91 100644 --- a/src/UTF8StringSliceTest.cpp +++ b/src/UTF8StringSliceTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ protected: UTF8StringSliceTest() : text("天行健,君子以自強不息。地勢坤,君子以厚德載物。"), empty(""){}; - const UTF8StringSlice text; + UTF8StringSlice text; const UTF8StringSlice empty; }; @@ -62,7 +62,6 @@ TEST_F(UTF8StringSliceTest, Compare) { } TEST_F(UTF8StringSliceTest, MoveRight) { - UTF8StringSlice text = this->text; text.MoveRight(); EXPECT_EQ(UTF8StringSlice("行健,君子以自強不息。地勢坤,君子以厚德載物。"), text); @@ -75,7 +74,6 @@ TEST_F(UTF8StringSliceTest, MoveRight) { } TEST_F(UTF8StringSliceTest, MoveLeft) { - UTF8StringSlice text = this->text; text.MoveLeft(); EXPECT_EQ(UTF8StringSlice("天行健,君子以自強不息。地勢坤,君子以厚德載物"), text); diff --git a/src/UTF8Util.cpp b/src/UTF8Util.cpp index 2a0a4cc..41888fd 100644 --- a/src/UTF8Util.cpp +++ b/src/UTF8Util.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/UTF8Util.hpp b/src/UTF8Util.hpp index d49a7a2..f1b4b14 100644 --- a/src/UTF8Util.hpp +++ b/src/UTF8Util.hpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2013 BYVoid + * Copyright 2013 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,11 +24,14 @@ #undef NOMINMAX #endif // _MSC_VER +#include + #include "Common.hpp" +#include "Exception.hpp" namespace opencc { /** - * UTF8 string utilities + * UTF8 std::string utilities * @ingroup opencc_cpp_api */ class OPENCC_EXPORT UTF8Util { @@ -117,7 +120,7 @@ public: } /** - * Returns the UTF8 length of a valid UTF8 string. + * Returns the UTF8 length of a valid UTF8 std::string. */ static size_t Length(const char* str) { size_t length = 0; @@ -149,17 +152,18 @@ public: } /** - * Copies a substring with given length to a new std::string. + * Copies a substd::string with given length to a new std::string. */ - static string FromSubstr(const char* str, size_t length) { - string newStr; + static std::string FromSubstr(const char* str, size_t length) { + std::string newStr; newStr.resize(length); strncpy(const_cast(newStr.c_str()), str, length); return newStr; } /** - * Returns true if the given string is longer or as long as the given length. + * Returns true if the given std::string is longer or as long as the given + * length. */ static bool NotShorterThan(const char* str, size_t byteLength) { while (byteLength > 0) { @@ -173,11 +177,11 @@ public: } /** - * Truncates a string with a maximal length in byte. + * Truncates a std::string with a maximal length in byte. * No UTF8 character will be broken. */ - static string TruncateUTF8(const char* str, size_t maxByteLength) { - string wordTrunc; + static std::string TruncateUTF8(const char* str, size_t maxByteLength) { + std::string wordTrunc; if (NotShorterThan(str, maxByteLength)) { size_t len = 0; const char* pStr = str; @@ -197,22 +201,23 @@ public: } /** - * Replaces all patterns in a string in place. + * Replaces all patterns in a std::string in place. */ - static void ReplaceAll(string& str, const char* from, const char* to) { - string::size_type pos = 0; - string::size_type fromLen = strlen(from); - string::size_type toLen = strlen(to); - while ((pos = str.find(from, pos)) != string::npos) { + static void ReplaceAll(std::string& str, const char* from, const char* to) { + std::string::size_type pos = 0; + std::string::size_type fromLen = strlen(from); + std::string::size_type toLen = strlen(to); + while ((pos = str.find(from, pos)) != std::string::npos) { str.replace(pos, fromLen, to); pos += toLen; } } /** - * Joins a string vector in to a string with a separator. + * Joins a std::string vector in to a std::string with a separator. */ - static string Join(const vector& strings, const string& separator) { + static std::string Join(const std::vector& strings, + const std::string& separator) { std::ostringstream buffer; bool first = true; for (const auto& str : strings) { @@ -226,9 +231,9 @@ public: } /** - * Joins a string vector in to a string. + * Joins a std::string vector in to a std::string. */ - static string Join(const vector& strings) { + static std::string Join(const std::vector& strings) { std::ostringstream buffer; for (const auto& str : strings) { buffer << str; @@ -237,7 +242,7 @@ public: } static void GetByteMap(const char* str, const size_t utf8Length, - vector* byteMap) { + std::vector* byteMap) { if (byteMap->size() < utf8Length) { byteMap->resize(utf8Length); } diff --git a/src/UTF8UtilTest.cpp b/src/UTF8UtilTest.cpp index 1be7830..c1516d5 100644 --- a/src/UTF8UtilTest.cpp +++ b/src/UTF8UtilTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,9 +70,9 @@ TEST_F(UTF8UtilTest, TruncateUTF8) { } TEST_F(UTF8UtilTest, GetByteMap) { - vector byteMap; + std::vector byteMap; UTF8Util::GetByteMap(text, 6, &byteMap); - EXPECT_EQ(vector({0, 3, 6, 9, 12, 16}), byteMap); + EXPECT_EQ(std::vector({0, 3, 6, 9, 12, 16}), byteMap); } } // namespace opencc diff --git a/src/benchmark/CMakeLists.txt b/src/benchmark/CMakeLists.txt index a0605d8..a3f1ae7 100644 --- a/src/benchmark/CMakeLists.txt +++ b/src/benchmark/CMakeLists.txt @@ -2,4 +2,18 @@ include_directories(..) add_executable(performance Performance.cpp) target_link_libraries(performance benchmark libopencc) -add_test(performance performance) +add_test(BenchmarkTest performance) + +if (WIN32) + add_custom_target( + copy_benchmark + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy benchmark" + ) + add_custom_target( + copy_opencc + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy opencc" + ) + add_dependencies(performance copy_benchmark copy_opencc) +endif() diff --git a/src/benchmark/Performance.cpp b/src/benchmark/Performance.cpp index f291f26..f0e6335 100644 --- a/src/benchmark/Performance.cpp +++ b/src/benchmark/Performance.cpp @@ -3,7 +3,12 @@ #include #include #include + +#ifdef _MSC_VER +#include +#else #include +#endif #include "SimpleConverter.hpp" #include "TestUtilsUTF8.hpp" diff --git a/src/opencc.h b/src/opencc.h index 031407e..bc3a9e9 100644 --- a/src/opencc.h +++ b/src/opencc.h @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,16 +94,16 @@ OPENCC_EXPORT opencc_t opencc_open_w(const wchar_t* configFileName); OPENCC_EXPORT int opencc_close(opencc_t opencc); /** - * Converts UTF-8 string + * Converts UTF-8 std::string * * @param opencc The opencc description pointer. - * @param input The UTF-8 encoded string. + * @param input The UTF-8 encoded std::string. * @param length The maximum length in byte to convert. If length is (size_t)-1, - * the whole string (terminated by '\0') will be converted. + * the whole std::string (terminated by '\0') will be converted. * @param output The buffer to store converted text. You MUST make sure this * buffer has sufficient space. * - * @return The length of converted string or (size_t)-1 on error. + * @return The length of converted std::string or (size_t)-1 on error. * * @ingroup opencc_c_api */ @@ -112,18 +112,18 @@ OPENCC_EXPORT size_t opencc_convert_utf8_to_buffer(opencc_t opencc, size_t length, char* output); /** - * Converts UTF-8 string - * This function returns an allocated C-Style string, which stores - * the converted string. + * Converts UTF-8 std::string + * This function returns an allocated C-Style std::string, which stores + * the converted std::string. * You MUST call opencc_convert_utf8_free() to release allocated memory. * * @param opencc The opencc description pointer. - * @param input The UTF-8 encoded string. + * @param input The UTF-8 encoded std::string. * @param length The maximum length in byte to convert. If length is (size_t)-1, - * the whole string (terminated by '\0') will be converted. + * the whole std::string (terminated by '\0') will be converted. * - * @return The newly allocated UTF-8 string that stores text converted, - * or NULL on error. + * @return The newly allocated UTF-8 std::string that stores text + * converted, or NULL on error. * @ingroup opencc_c_api */ OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, const char* input, @@ -132,7 +132,8 @@ OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, const char* input, /** * Releases allocated buffer by opencc_convert_utf8 * - * @param str Pointer to the allocated string buffer by opencc_convert_utf8. + * @param str Pointer to the allocated std::string buffer by + * opencc_convert_utf8. * * @ingroup opencc_c_api */ diff --git a/src/tools/CommandLine.cpp b/src/tools/CommandLine.cpp index ee669ed..5ba3b99 100644 --- a/src/tools/CommandLine.cpp +++ b/src/tools/CommandLine.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ * limitations under the License. */ +#include + #include "CmdLineOutput.hpp" #include "Config.hpp" #include "Converter.hpp" @@ -23,9 +25,9 @@ using namespace opencc; -Optional inputFileName = Optional::Null(); -Optional outputFileName = Optional::Null(); -string configFileName; +Optional inputFileName = Optional::Null(); +Optional outputFileName = Optional::Null(); +std::string configFileName; bool noFlush; Config config; ConverterPtr converter; @@ -52,9 +54,9 @@ void ConvertLineByLine() { } else { isFirstLine = false; } - string line; + std::string line; std::getline(inputStream, line); - const string& converted = converter->Convert(line); + const std::string& converted = converter->Convert(line); fputs(converted.c_str(), fout); if (!noFlush) { // Flush every line if the output stream is stdout. @@ -64,10 +66,10 @@ void ConvertLineByLine() { fclose(fout); } -void Convert(string inputFileName) { +void Convert(std::string fileName) { const int BUFFER_SIZE = 1024 * 1024; static bool bufferInitialized = false; - static string buffer; + static std::string buffer; static char* bufferBegin; static const char* bufferEnd; static char* bufferPtr; @@ -82,27 +84,27 @@ void Convert(string inputFileName) { } bool needToRemove = false; - if (!outputFileName.IsNull() && inputFileName == outputFileName.Get()) { + if (!outputFileName.IsNull() && fileName == outputFileName.Get()) { // Special case: input == output - const string tempFileName = std::tmpnam(nullptr); - std::ifstream src(inputFileName, std::ios::binary); + const std::string tempFileName = std::tmpnam(nullptr); + std::ifstream src(fileName, std::ios::binary); std::ofstream dst(tempFileName, std::ios::binary); dst << src.rdbuf(); dst.close(); - inputFileName = tempFileName; + fileName = tempFileName; needToRemove = true; } - FILE* fin = fopen(inputFileName.c_str(), "r"); + FILE* fin = fopen(fileName.c_str(), "r"); if (!fin) { - throw FileNotFound(inputFileName); + throw FileNotFound(fileName); } FILE* fout = GetOutputStream(); while (!feof(fin)) { size_t length = fread(bufferPtr, sizeof(char), bufferSizeAvailble, fin); bufferPtr[length] = '\0'; size_t remainingLength = 0; - string remainingTemp; + std::string remainingTemp; if (length == bufferSizeAvailble) { // fread may breaks UTF8 character // Find the end of last character @@ -121,7 +123,7 @@ void Convert(string inputFileName) { } } // Perform conversion - const string& converted = converter->Convert(buffer); + const std::string& converted = converter->Convert(buffer); fputs(converted.c_str(), fout); if (!noFlush) { // Flush every line if the output stream is stdout. @@ -137,7 +139,7 @@ void Convert(string inputFileName) { fclose(fout); if (needToRemove) { // Remove temporary file. - std::remove(inputFileName.c_str()); + std::remove(fileName.c_str()); } } @@ -148,13 +150,13 @@ int main(int argc, const char* argv[]) { CmdLineOutput cmdLineOutput; cmd.setOutput(&cmdLineOutput); - TCLAP::ValueArg configArg( + TCLAP::ValueArg configArg( "c", "config", "Configuration file", false /* required */, "s2t.json" /* default */, "file" /* type */, cmd); - TCLAP::ValueArg outputArg( + TCLAP::ValueArg outputArg( "o", "output", "Write converted text to .", false /* required */, "" /* default */, "file" /* type */, cmd); - TCLAP::ValueArg inputArg( + TCLAP::ValueArg inputArg( "i", "input", "Read original text from .", false /* required */, "" /* default */, "file" /* type */, cmd); TCLAP::ValueArg noFlushArg( @@ -164,10 +166,10 @@ int main(int argc, const char* argv[]) { configFileName = configArg.getValue(); noFlush = noFlushArg.getValue(); if (inputArg.isSet()) { - inputFileName = Optional(inputArg.getValue()); + inputFileName = Optional(inputArg.getValue()); } if (outputArg.isSet()) { - outputFileName = Optional(outputArg.getValue()); + outputFileName = Optional(outputArg.getValue()); noFlush = true; } converter = config.NewFromFile(configFileName); diff --git a/src/tools/DictConverter.cpp b/src/tools/DictConverter.cpp index ef91745..bb5ea8f 100644 --- a/src/tools/DictConverter.cpp +++ b/src/tools/DictConverter.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2010-2014 BYVoid + * Copyright 2010-2014 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include "DictConverter.hpp" #include "CmdLineOutput.hpp" +#include "Exception.hpp" using namespace opencc; @@ -28,21 +29,21 @@ int main(int argc, const char* argv[]) { CmdLineOutput cmdLineOutput; cmd.setOutput(&cmdLineOutput); - vector dictFormats{"text", "ocd2", "ocd"}; - TCLAP::ValuesConstraint allowedVals(dictFormats); + std::vector dictFormats{"text", "ocd2", "ocd"}; + TCLAP::ValuesConstraint allowedVals(dictFormats); - TCLAP::ValueArg toArg("t", "to", "Output format", - true /* required */, "" /* default */, - &allowedVals /* type */, cmd); - TCLAP::ValueArg fromArg("f", "from", "Input format", - true /* required */, "" /* default */, - &allowedVals /* type */, cmd); - TCLAP::ValueArg outputArg( + TCLAP::ValueArg toArg("t", "to", "Output format", + true /* required */, "" /* default */, + &allowedVals /* type */, cmd); + TCLAP::ValueArg fromArg("f", "from", "Input format", + true /* required */, "" /* default */, + &allowedVals /* type */, cmd); + TCLAP::ValueArg outputArg( "o", "output", "Path to output dictionary", true /* required */, "" /* default */, "file" /* type */, cmd); - TCLAP::ValueArg inputArg("i", "input", "Path to input dictionary", - true /* required */, "" /* default */, - "file" /* type */, cmd); + TCLAP::ValueArg inputArg( + "i", "input", "Path to input dictionary", true /* required */, + "" /* default */, "file" /* type */, cmd); cmd.parse(argc, argv); ConvertDictionary(inputArg.getValue(), outputArg.getValue(), fromArg.getValue(), toArg.getValue()); diff --git a/src/tools/PhraseExtract.cpp b/src/tools/PhraseExtract.cpp index f0710aa..eddb18d 100644 --- a/src/tools/PhraseExtract.cpp +++ b/src/tools/PhraseExtract.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,22 +16,25 @@ * limitations under the License. */ -#include "PhraseExtract.hpp" +#include + #include "CmdLineOutput.hpp" +#include "PhraseExtract.hpp" using opencc::Exception; using opencc::PhraseExtract; using opencc::UTF8StringSlice; -void Extract(const vector& inputFiles, const string& outputFile) { +void Extract(const std::vector& inputFiles, + const std::string& outputFile) { std::ostringstream buffer; for (const auto& inputFile : inputFiles) { std::ifstream ifs(inputFile); - const string contents((std::istreambuf_iterator(ifs)), - (std::istreambuf_iterator())); + const std::string contents((std::istreambuf_iterator(ifs)), + (std::istreambuf_iterator())); buffer << contents; } - const string& text = buffer.str(); + const std::string& text = buffer.str(); PhraseExtract extractor; extractor.SetWordMaxLength(2); extractor.SetPrefixSetLength(1); @@ -55,12 +58,12 @@ int main(int argc, const char* argv[]) { VERSION); CmdLineOutput cmdLineOutput; cmd.setOutput(&cmdLineOutput); - TCLAP::UnlabeledMultiArg fileNames("fileName", "Input files", - true /* required */, "files"); + TCLAP::UnlabeledMultiArg fileNames( + "fileName", "Input files", true /* required */, "files"); cmd.add(fileNames); - TCLAP::ValueArg outputArg("o", "output", "Output file", - true /* required */, "" /* default */, - "file" /* type */, cmd); + TCLAP::ValueArg outputArg( + "o", "output", "Output file", true /* required */, "" /* default */, + "file" /* type */, cmd); cmd.parse(argc, argv); Extract(fileNames.getValue(), outputArg.getValue()); } catch (TCLAP::ArgException& e) { diff --git a/test.cmd b/test.cmd new file mode 100644 index 0000000..62b82e1 --- /dev/null +++ b/test.cmd @@ -0,0 +1,4 @@ +cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DCMAKE_BUILD_TYPE=Debug +cmake --build build --config Debug --target install +cd build +ctest --verbose -C Debug diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bdba2a7..d365960 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,6 +14,19 @@ foreach (CONFIG_TEST_FILE ${CONFIG_TEST}) endforeach (CONFIG_TEST_FILE) if (ENABLE_GTEST) + if (WIN32) + add_custom_target( + copy_gtest_to_test + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy gtest" + ) + add_custom_target( + copy_gtest_main_to_test + ${CMAKE_COMMAND} -E copy $ ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Copy gtest_main" + ) + endif() + include_directories(../deps/gtest-1.7.0/include) set(UNITTESTS CommandLineConvertTest @@ -22,5 +35,8 @@ if (ENABLE_GTEST) add_executable(${UNITTEST} ${UNITTEST}.cpp) target_link_libraries(${UNITTEST} gtest gtest_main libopencc) add_test(${UNITTEST} ${UNITTEST}) + if (WIN32) + add_dependencies(${UNITTEST} copy_gtest_to_test copy_gtest_main_to_test) + endif() endforeach(UNITTEST) endif() diff --git a/test/CommandLineConvertTest.cpp b/test/CommandLineConvertTest.cpp index 79026a0..39ed1d5 100644 --- a/test/CommandLineConvertTest.cpp +++ b/test/CommandLineConvertTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 BYVoid + * Copyright 2015 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ * limitations under the License. */ +#include + #include "Common.hpp" #include "gtest/gtest.h" @@ -35,11 +37,11 @@ protected: virtual void TearDown() { ASSERT_EQ(0, chdir(originalWorkingDirectory)); } - string GetFileContents(const string& fileName) const { + std::string GetFileContents(const std::string& fileName) const { std::ifstream fs(fileName); EXPECT_TRUE(fs.is_open()); - const string content((std::istreambuf_iterator(fs)), - (std::istreambuf_iterator())); + const std::string content((std::istreambuf_iterator(fs)), + (std::istreambuf_iterator())); fs.close(); return content; } @@ -49,7 +51,15 @@ protected: } const char* OpenccCommand() const { +#ifndef _MSC_VER return PROJECT_BINARY_DIR "/src/tools/opencc"; +#else +#ifdef NDEBUG + return PROJECT_BINARY_DIR "/src/tools/Release/opencc.exe"; +#else + return PROJECT_BINARY_DIR "/src/tools/Debug/opencc.exe"; +#endif +#endif } const char* InputDirectory() const { @@ -66,17 +76,17 @@ protected: return CMAKE_SOURCE_DIR "/data/config/"; } - string OutputFile(const char* config) const { - return string(OutputDirectory()) + config + ".out"; + std::string OutputFile(const char* config) const { + return std::string(OutputDirectory()) + config + ".out"; } - string AnswerFile(const char* config) const { - return string(AnswerDirectory()) + config + ".ans"; + std::string AnswerFile(const char* config) const { + return std::string(AnswerDirectory()) + config + ".ans"; } - string TestCommand(const char* config) const { - return OpenccCommand() + string("") + " -i " + InputDirectory() + config + - ".in" + " -o " + OutputFile(config) + " -c " + + std::string TestCommand(const char* config) const { + return OpenccCommand() + std::string("") + " -i " + InputDirectory() + + config + ".in" + " -o " + OutputFile(config) + " -c " + ConfigurationDirectory() + config + ".json"; } @@ -89,8 +99,8 @@ class ConfigurationTest : public CommandLineConvertTest, TEST_P(ConfigurationTest, Convert) { const char* config = GetParam(); ASSERT_EQ(0, system(TestCommand(config).c_str())); - const string& output = GetFileContents(OutputFile(config)); - const string& answer = GetFileContents(AnswerFile(config)); + const std::string& output = GetFileContents(OutputFile(config)); + const std::string& answer = GetFileContents(AnswerFile(config)); ASSERT_EQ(answer, output); }